Ruby 3.3.5p100 (2024-09-03 revision ef084cc8f4958c1b6e4ead99136631bef6d8ddba)
prism.c
1#include "prism.h"
2
6const char *
7pm_version(void) {
8 return PRISM_VERSION;
9}
10
15#define PM_TAB_WHITESPACE_SIZE 8
16
17#ifndef PM_DEBUG_LOGGING
23#define PM_DEBUG_LOGGING 0
24#endif
25
26#if PM_DEBUG_LOGGING
27
28/******************************************************************************/
29/* Debugging */
30/******************************************************************************/
31
32PRISM_ATTRIBUTE_UNUSED static const char *
33debug_context(pm_context_t context) {
34 switch (context) {
35 case PM_CONTEXT_BEGIN: return "BEGIN";
36 case PM_CONTEXT_CLASS: return "CLASS";
37 case PM_CONTEXT_CASE_IN: return "CASE_IN";
38 case PM_CONTEXT_CASE_WHEN: return "CASE_WHEN";
39 case PM_CONTEXT_DEF: return "DEF";
40 case PM_CONTEXT_DEF_PARAMS: return "DEF_PARAMS";
41 case PM_CONTEXT_DEFAULT_PARAMS: return "DEFAULT_PARAMS";
42 case PM_CONTEXT_ENSURE: return "ENSURE";
43 case PM_CONTEXT_ENSURE_DEF: return "ENSURE_DEF";
44 case PM_CONTEXT_ELSE: return "ELSE";
45 case PM_CONTEXT_ELSIF: return "ELSIF";
46 case PM_CONTEXT_EMBEXPR: return "EMBEXPR";
47 case PM_CONTEXT_BLOCK_BRACES: return "BLOCK_BRACES";
48 case PM_CONTEXT_BLOCK_KEYWORDS: return "BLOCK_KEYWORDS";
49 case PM_CONTEXT_FOR: return "FOR";
50 case PM_CONTEXT_FOR_INDEX: return "FOR_INDEX";
51 case PM_CONTEXT_IF: return "IF";
52 case PM_CONTEXT_MAIN: return "MAIN";
53 case PM_CONTEXT_MODULE: return "MODULE";
54 case PM_CONTEXT_PARENS: return "PARENS";
55 case PM_CONTEXT_POSTEXE: return "POSTEXE";
56 case PM_CONTEXT_PREDICATE: return "PREDICATE";
57 case PM_CONTEXT_PREEXE: return "PREEXE";
58 case PM_CONTEXT_RESCUE: return "RESCUE";
59 case PM_CONTEXT_RESCUE_ELSE: return "RESCUE_ELSE";
60 case PM_CONTEXT_RESCUE_ELSE_DEF: return "RESCUE_ELSE_DEF";
61 case PM_CONTEXT_RESCUE_DEF: return "RESCUE_DEF";
62 case PM_CONTEXT_SCLASS: return "SCLASS";
63 case PM_CONTEXT_UNLESS: return "UNLESS";
64 case PM_CONTEXT_UNTIL: return "UNTIL";
65 case PM_CONTEXT_WHILE: return "WHILE";
66 case PM_CONTEXT_LAMBDA_BRACES: return "LAMBDA_BRACES";
67 case PM_CONTEXT_LAMBDA_DO_END: return "LAMBDA_DO_END";
68 }
69 return NULL;
70}
71
72PRISM_ATTRIBUTE_UNUSED static void
73debug_contexts(pm_parser_t *parser) {
74 pm_context_node_t *context_node = parser->current_context;
75 fprintf(stderr, "CONTEXTS: ");
76
77 if (context_node != NULL) {
78 while (context_node != NULL) {
79 fprintf(stderr, "%s", debug_context(context_node->context));
80 context_node = context_node->prev;
81 if (context_node != NULL) {
82 fprintf(stderr, " <- ");
83 }
84 }
85 } else {
86 fprintf(stderr, "NONE");
87 }
88
89 fprintf(stderr, "\n");
90}
91
92PRISM_ATTRIBUTE_UNUSED static void
93debug_node(const pm_parser_t *parser, const pm_node_t *node) {
94 pm_buffer_t output_buffer = { 0 };
95 pm_prettyprint(&output_buffer, parser, node);
96
97 fprintf(stderr, "%.*s", (int) output_buffer.length, output_buffer.value);
98 pm_buffer_free(&output_buffer);
99}
100
101PRISM_ATTRIBUTE_UNUSED static void
102debug_lex_mode(pm_parser_t *parser) {
103 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
104 bool first = true;
105
106 while (lex_mode != NULL) {
107 if (first) {
108 first = false;
109 } else {
110 fprintf(stderr, " <- ");
111 }
112
113 switch (lex_mode->mode) {
114 case PM_LEX_DEFAULT: fprintf(stderr, "DEFAULT"); break;
115 case PM_LEX_EMBEXPR: fprintf(stderr, "EMBEXPR"); break;
116 case PM_LEX_EMBVAR: fprintf(stderr, "EMBVAR"); break;
117 case PM_LEX_HEREDOC: fprintf(stderr, "HEREDOC"); break;
118 case PM_LEX_LIST: fprintf(stderr, "LIST (terminator=%c, interpolation=%d)", lex_mode->as.list.terminator, lex_mode->as.list.interpolation); break;
119 case PM_LEX_REGEXP: fprintf(stderr, "REGEXP (terminator=%c)", lex_mode->as.regexp.terminator); break;
120 case PM_LEX_STRING: fprintf(stderr, "STRING (terminator=%c, interpolation=%d)", lex_mode->as.string.terminator, lex_mode->as.string.interpolation); break;
121 }
122
123 lex_mode = lex_mode->prev;
124 }
125
126 fprintf(stderr, "\n");
127}
128
129PRISM_ATTRIBUTE_UNUSED static void
130debug_state(pm_parser_t *parser) {
131 fprintf(stderr, "STATE: ");
132 bool first = true;
133
134 if (parser->lex_state == PM_LEX_STATE_NONE) {
135 fprintf(stderr, "NONE\n");
136 return;
137 }
138
139#define CHECK_STATE(state) \
140 if (parser->lex_state & state) { \
141 if (!first) fprintf(stderr, "|"); \
142 fprintf(stderr, "%s", #state); \
143 first = false; \
144 }
145
146 CHECK_STATE(PM_LEX_STATE_BEG)
147 CHECK_STATE(PM_LEX_STATE_END)
148 CHECK_STATE(PM_LEX_STATE_ENDARG)
149 CHECK_STATE(PM_LEX_STATE_ENDFN)
150 CHECK_STATE(PM_LEX_STATE_ARG)
151 CHECK_STATE(PM_LEX_STATE_CMDARG)
152 CHECK_STATE(PM_LEX_STATE_MID)
153 CHECK_STATE(PM_LEX_STATE_FNAME)
154 CHECK_STATE(PM_LEX_STATE_DOT)
155 CHECK_STATE(PM_LEX_STATE_CLASS)
156 CHECK_STATE(PM_LEX_STATE_LABEL)
157 CHECK_STATE(PM_LEX_STATE_LABELED)
158 CHECK_STATE(PM_LEX_STATE_FITEM)
159
160#undef CHECK_STATE
161
162 fprintf(stderr, "\n");
163}
164
165PRISM_ATTRIBUTE_UNUSED static void
166debug_token(pm_token_t * token) {
167 fprintf(stderr, "%s: \"%.*s\"\n", pm_token_type_to_str(token->type), (int) (token->end - token->start), token->start);
168}
169
170#endif
171
172// Macros for min/max.
173#define MIN(a,b) (((a)<(b))?(a):(b))
174#define MAX(a,b) (((a)>(b))?(a):(b))
175
176/******************************************************************************/
177/* Lex mode manipulations */
178/******************************************************************************/
179
184static inline uint8_t
185lex_mode_incrementor(const uint8_t start) {
186 switch (start) {
187 case '(':
188 case '[':
189 case '{':
190 case '<':
191 return start;
192 default:
193 return '\0';
194 }
195}
196
201static inline uint8_t
202lex_mode_terminator(const uint8_t start) {
203 switch (start) {
204 case '(':
205 return ')';
206 case '[':
207 return ']';
208 case '{':
209 return '}';
210 case '<':
211 return '>';
212 default:
213 return start;
214 }
215}
216
222static bool
223lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
224 lex_mode.prev = parser->lex_modes.current;
225 parser->lex_modes.index++;
226
227 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
228 parser->lex_modes.current = (pm_lex_mode_t *) malloc(sizeof(pm_lex_mode_t));
229 if (parser->lex_modes.current == NULL) return false;
230
231 *parser->lex_modes.current = lex_mode;
232 } else {
233 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
234 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
235 }
236
237 return true;
238}
239
243static inline bool
244lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
245 uint8_t incrementor = lex_mode_incrementor(delimiter);
246 uint8_t terminator = lex_mode_terminator(delimiter);
247
248 pm_lex_mode_t lex_mode = {
249 .mode = PM_LEX_LIST,
250 .as.list = {
251 .nesting = 0,
252 .interpolation = interpolation,
253 .incrementor = incrementor,
254 .terminator = terminator
255 }
256 };
257
258 // These are the places where we need to split up the content of the list.
259 // We'll use strpbrk to find the first of these characters.
260 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
261 memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
262
263 // Now we'll add the terminator to the list of breakpoints.
264 size_t index = 7;
265 breakpoints[index++] = terminator;
266
267 // If interpolation is allowed, then we're going to check for the #
268 // character. Otherwise we'll only look for escapes and the terminator.
269 if (interpolation) {
270 breakpoints[index++] = '#';
271 }
272
273 // If there is an incrementor, then we'll check for that as well.
274 if (incrementor != '\0') {
275 breakpoints[index++] = incrementor;
276 }
277
278 parser->explicit_encoding = NULL;
279 return lex_mode_push(parser, lex_mode);
280}
281
287static inline bool
288lex_mode_push_list_eof(pm_parser_t *parser) {
289 return lex_mode_push_list(parser, false, '\0');
290}
291
295static inline bool
296lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
297 pm_lex_mode_t lex_mode = {
298 .mode = PM_LEX_REGEXP,
299 .as.regexp = {
300 .nesting = 0,
301 .incrementor = incrementor,
302 .terminator = terminator
303 }
304 };
305
306 // These are the places where we need to split up the content of the
307 // regular expression. We'll use strpbrk to find the first of these
308 // characters.
309 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
310 memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
311
312 // First we'll add the terminator.
313 breakpoints[3] = terminator;
314
315 // Next, if there is an incrementor, then we'll check for that as well.
316 if (incrementor != '\0') {
317 breakpoints[4] = incrementor;
318 }
319
320 return lex_mode_push(parser, lex_mode);
321}
322
326static inline bool
327lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
328 pm_lex_mode_t lex_mode = {
329 .mode = PM_LEX_STRING,
330 .as.string = {
331 .nesting = 0,
332 .interpolation = interpolation,
333 .label_allowed = label_allowed,
334 .incrementor = incrementor,
335 .terminator = terminator
336 }
337 };
338
339 // These are the places where we need to split up the content of the
340 // string. We'll use strpbrk to find the first of these characters.
341 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
342 memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
343
344 // Now add in the terminator.
345 size_t index = 2;
346 breakpoints[index++] = terminator;
347
348 // If interpolation is allowed, then we're going to check for the #
349 // character. Otherwise we'll only look for escapes and the terminator.
350 if (interpolation) {
351 breakpoints[index++] = '#';
352 }
353
354 // If we have an incrementor, then we'll add that in as a breakpoint as
355 // well.
356 if (incrementor != '\0') {
357 breakpoints[index++] = incrementor;
358 }
359
360 parser->explicit_encoding = NULL;
361 return lex_mode_push(parser, lex_mode);
362}
363
369static inline bool
370lex_mode_push_string_eof(pm_parser_t *parser) {
371 return lex_mode_push_string(parser, false, false, '\0', '\0');
372}
373
379static void
380lex_mode_pop(pm_parser_t *parser) {
381 if (parser->lex_modes.index == 0) {
382 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
383 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
384 parser->lex_modes.index--;
385 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
386 } else {
387 parser->lex_modes.index--;
388 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
389 free(parser->lex_modes.current);
390 parser->lex_modes.current = prev;
391 }
392}
393
397static inline bool
398lex_state_p(pm_parser_t *parser, pm_lex_state_t state) {
399 return parser->lex_state & state;
400}
401
402typedef enum {
403 PM_IGNORED_NEWLINE_NONE = 0,
404 PM_IGNORED_NEWLINE_ALL,
405 PM_IGNORED_NEWLINE_PATTERN
406} pm_ignored_newline_type_t;
407
408static inline pm_ignored_newline_type_t
409lex_state_ignored_p(pm_parser_t *parser) {
410 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
411
412 if (ignored) {
413 return PM_IGNORED_NEWLINE_ALL;
414 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
415 return PM_IGNORED_NEWLINE_PATTERN;
416 } else {
417 return PM_IGNORED_NEWLINE_NONE;
418 }
419}
420
421static inline bool
422lex_state_beg_p(pm_parser_t *parser) {
423 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
424}
425
426static inline bool
427lex_state_arg_p(pm_parser_t *parser) {
428 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
429}
430
431static inline bool
432lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
433 if (parser->current.end >= parser->end) {
434 return false;
435 }
436 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
437}
438
439static inline bool
440lex_state_end_p(pm_parser_t *parser) {
441 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
442}
443
447static inline bool
448lex_state_operator_p(pm_parser_t *parser) {
449 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
450}
451
456static inline void
457lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
458 parser->lex_state = state;
459}
460
461#if PM_DEBUG_LOGGING
462static inline void
463debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
464 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
465 debug_state(parser);
466 lex_state_set(parser, state);
467 fprintf(stderr, "Now: ");
468 debug_state(parser);
469 fprintf(stderr, "\n");
470}
471
472#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
473#endif
474
475/******************************************************************************/
476/* Diagnostic-related functions */
477/******************************************************************************/
478
482static inline void
483pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
484 pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
485}
486
490#define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
491
496static inline void
497pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
498 pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
499}
500
505#define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (location)->start, (location)->end, diag_id, __VA_ARGS__)
506
511static inline void
512pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
513 pm_parser_err(parser, node->location.start, node->location.end, diag_id);
514}
515
520#define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, node->location.start, node->location.end, diag_id, __VA_ARGS__)
521
526static inline void
527pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
528 pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
529}
530
535static inline void
536pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
537 pm_parser_err(parser, token->start, token->end, diag_id);
538}
539
544#define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (token).start, (token).end, diag_id, __VA_ARGS__)
545
549static inline void
550pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
551 if (!parser->suppress_warnings) {
552 pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
553 }
554}
555
560static inline void
561pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
562 pm_parser_warn(parser, token->start, token->end, diag_id);
563}
564
565/******************************************************************************/
566/* Node-related functions */
567/******************************************************************************/
568
572static inline pm_constant_id_t
573pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
574 return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
575}
576
580static inline pm_constant_id_t
581pm_parser_constant_id_owned(pm_parser_t *parser, const uint8_t *start, size_t length) {
582 return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
583}
584
588static inline pm_constant_id_t
589pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
590 return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
591}
592
596static inline pm_constant_id_t
597pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
598 return pm_parser_constant_id_location(parser, token->start, token->end);
599}
600
605static inline pm_constant_id_t
606pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
607 return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
608}
609
615static pm_node_t*
616pm_check_value_expression(pm_node_t *node) {
617 pm_node_t* void_node = NULL;
618
619 while (node != NULL) {
620 switch (PM_NODE_TYPE(node)) {
621 case PM_RETURN_NODE:
622 case PM_BREAK_NODE:
623 case PM_NEXT_NODE:
624 case PM_REDO_NODE:
625 case PM_RETRY_NODE:
627 return void_node != NULL ? void_node : node;
629 return NULL;
630 case PM_BEGIN_NODE: {
631 pm_begin_node_t *cast = (pm_begin_node_t *) node;
632 node = (pm_node_t *) cast->statements;
633 break;
634 }
635 case PM_PARENTHESES_NODE: {
637 node = (pm_node_t *) cast->body;
638 break;
639 }
640 case PM_STATEMENTS_NODE: {
642 node = cast->body.nodes[cast->body.size - 1];
643 break;
644 }
645 case PM_IF_NODE: {
646 pm_if_node_t *cast = (pm_if_node_t *) node;
647 if (cast->statements == NULL || cast->consequent == NULL) {
648 return NULL;
649 }
650 pm_node_t *vn = pm_check_value_expression((pm_node_t *) cast->statements);
651 if (vn == NULL) {
652 return NULL;
653 }
654 if (void_node == NULL) {
655 void_node = vn;
656 }
657 node = cast->consequent;
658 break;
659 }
660 case PM_UNLESS_NODE: {
661 pm_unless_node_t *cast = (pm_unless_node_t *) node;
662 if (cast->statements == NULL || cast->consequent == NULL) {
663 return NULL;
664 }
665 pm_node_t *vn = pm_check_value_expression((pm_node_t *) cast->statements);
666 if (vn == NULL) {
667 return NULL;
668 }
669 if (void_node == NULL) {
670 void_node = vn;
671 }
672 node = (pm_node_t *) cast->consequent;
673 break;
674 }
675 case PM_ELSE_NODE: {
676 pm_else_node_t *cast = (pm_else_node_t *) node;
677 node = (pm_node_t *) cast->statements;
678 break;
679 }
680 case PM_AND_NODE: {
681 pm_and_node_t *cast = (pm_and_node_t *) node;
682 node = cast->left;
683 break;
684 }
685 case PM_OR_NODE: {
686 pm_or_node_t *cast = (pm_or_node_t *) node;
687 node = cast->left;
688 break;
689 }
690 default:
691 return NULL;
692 }
693 }
694
695 return NULL;
696}
697
698static inline void
699pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
700 pm_node_t *void_node = pm_check_value_expression(node);
701 if (void_node != NULL) {
702 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
703 }
704}
705
715static void
716pm_conditional_predicate(pm_node_t *node) {
717 switch (PM_NODE_TYPE(node)) {
718 case PM_AND_NODE: {
719 pm_and_node_t *cast = (pm_and_node_t *) node;
720 pm_conditional_predicate(cast->left);
721 pm_conditional_predicate(cast->right);
722 break;
723 }
724 case PM_OR_NODE: {
725 pm_or_node_t *cast = (pm_or_node_t *) node;
726 pm_conditional_predicate(cast->left);
727 pm_conditional_predicate(cast->right);
728 break;
729 }
730 case PM_PARENTHESES_NODE: {
732
733 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
734 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
735 if (statements->body.size == 1) pm_conditional_predicate(statements->body.nodes[0]);
736 }
737
738 break;
739 }
740 case PM_RANGE_NODE: {
741 pm_range_node_t *cast = (pm_range_node_t *) node;
742 if (cast->left) {
743 pm_conditional_predicate(cast->left);
744 }
745 if (cast->right) {
746 pm_conditional_predicate(cast->right);
747 }
748
749 // Here we change the range node into a flip flop node. We can do
750 // this since the nodes are exactly the same except for the type.
751 // We're only asserting against the size when we should probably
752 // assert against the entire layout, but we'll assume tests will
753 // catch this.
754 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
755 node->type = PM_FLIP_FLOP_NODE;
756
757 break;
758 }
760 // Here we change the regular expression node into a match last line
761 // node. We can do this since the nodes are exactly the same except
762 // for the type.
765 break;
767 // Here we change the interpolated regular expression node into an
768 // interpolated match last line node. We can do this since the nodes
769 // are exactly the same except for the type.
772 break;
773 default:
774 break;
775 }
776}
777
786static inline pm_token_t
787not_provided(pm_parser_t *parser) {
788 return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
789}
790
791#define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = parser->start, .end = parser->start })
792#define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
793#define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
794#define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
795#define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
796#define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
797
817
821static inline const uint8_t *
822pm_arguments_end(pm_arguments_t *arguments) {
823 if (arguments->block != NULL) {
824 const uint8_t *end = arguments->block->location.end;
825 if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
826 end = arguments->closing_loc.end;
827 }
828 return end;
829 }
830 if (arguments->closing_loc.start != NULL) {
831 return arguments->closing_loc.end;
832 }
833 if (arguments->arguments != NULL) {
834 return arguments->arguments->base.location.end;
835 }
836 return arguments->closing_loc.end;
837}
838
843static void
844pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
845 // First, check that we have arguments and that we don't have a closing
846 // location for them.
847 if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
848 return;
849 }
850
851 // Next, check that we don't have a single parentheses argument. This would
852 // look like:
853 //
854 // foo (1) {}
855 //
856 // In this case, it's actually okay for the block to be attached to the
857 // call, even though it looks like it's attached to the argument.
858 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
859 return;
860 }
861
862 // If we didn't hit a case before this check, then at this point we need to
863 // add a syntax error.
864 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
865}
866
867/******************************************************************************/
868/* Node flag handling functions */
869/******************************************************************************/
870
874static inline void
875pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
876 node->flags |= flag;
877}
878
882static inline void
883pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
884 node->flags &= (pm_node_flags_t) ~flag;
885}
886
887
888/******************************************************************************/
889/* Node creation functions */
890/******************************************************************************/
891
897static uint32_t
898parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
899 ptrdiff_t diff = end - start;
900 assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
901 size_t length = (size_t) diff;
902
903 char *digits = calloc(length + 1, sizeof(char));
904 memcpy(digits, start, length);
905 digits[length] = '\0';
906
907 char *endptr;
908 errno = 0;
909 unsigned long value = strtoul(digits, &endptr, 10);
910
911 if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
912 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
913 value = UINT32_MAX;
914 }
915
916 free(digits);
917
918 if (value > UINT32_MAX) {
919 pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
920 value = UINT32_MAX;
921 }
922
923 return (uint32_t) value;
924}
925
931#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
932
936static inline pm_node_flags_t
937pm_regular_expression_flags_create(const pm_token_t *closing) {
938 pm_node_flags_t flags = 0;
939
940 if (closing->type == PM_TOKEN_REGEXP_END) {
941 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
942 switch (*flag) {
943 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
944 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
945 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
946 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
947
948 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
949 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
950 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
951 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
952
953 default: assert(false && "unreachable");
954 }
955 }
956 }
957
958 return flags;
959}
960
961#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
962
964pm_statements_node_create(pm_parser_t *parser);
965
966static void
967pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement);
968
969static size_t
970pm_statements_node_body_length(pm_statements_node_t *node);
971
976static inline void *
977pm_alloc_node(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
978 void *memory = calloc(1, size);
979 if (memory == NULL) {
980 fprintf(stderr, "Failed to allocate %zu bytes\n", size);
981 abort();
982 }
983 return memory;
984}
985
986#define PM_ALLOC_NODE(parser, type) (type *) pm_alloc_node(parser, sizeof(type))
987
991static pm_missing_node_t *
992pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
993 pm_missing_node_t *node = PM_ALLOC_NODE(parser, pm_missing_node_t);
994 *node = (pm_missing_node_t) {{ .type = PM_MISSING_NODE, .location = { .start = start, .end = end } }};
995 return node;
996}
997
1002pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1003 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1005
1007 {
1009 .location = {
1010 .start = keyword->start,
1011 .end = old_name->location.end
1012 },
1013 },
1014 .new_name = new_name,
1015 .old_name = old_name,
1016 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1017 };
1018
1019 return node;
1020}
1021
1026pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1027 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1028 pm_alias_method_node_t *node = PM_ALLOC_NODE(parser, pm_alias_method_node_t);
1029
1030 *node = (pm_alias_method_node_t) {
1031 {
1032 .type = PM_ALIAS_METHOD_NODE,
1033 .location = {
1034 .start = keyword->start,
1035 .end = old_name->location.end
1036 },
1037 },
1038 .new_name = new_name,
1039 .old_name = old_name,
1040 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1041 };
1042
1043 return node;
1044}
1045
1050pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
1051 pm_alternation_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_alternation_pattern_node_t);
1052
1054 {
1056 .location = {
1057 .start = left->location.start,
1058 .end = right->location.end
1059 },
1060 },
1061 .left = left,
1062 .right = right,
1063 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
1064 };
1065
1066 return node;
1067}
1068
1072static pm_and_node_t *
1073pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
1074 pm_assert_value_expression(parser, left);
1075
1076 pm_and_node_t *node = PM_ALLOC_NODE(parser, pm_and_node_t);
1077
1078 *node = (pm_and_node_t) {
1079 {
1080 .type = PM_AND_NODE,
1081 .location = {
1082 .start = left->location.start,
1083 .end = right->location.end
1084 },
1085 },
1086 .left = left,
1087 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
1088 .right = right
1089 };
1090
1091 return node;
1092}
1093
1097static pm_arguments_node_t *
1098pm_arguments_node_create(pm_parser_t *parser) {
1099 pm_arguments_node_t *node = PM_ALLOC_NODE(parser, pm_arguments_node_t);
1100
1101 *node = (pm_arguments_node_t) {
1102 {
1103 .type = PM_ARGUMENTS_NODE,
1104 .location = PM_LOCATION_NULL_VALUE(parser)
1105 },
1106 .arguments = { 0 }
1107 };
1108
1109 return node;
1110}
1111
1115static size_t
1116pm_arguments_node_size(pm_arguments_node_t *node) {
1117 return node->arguments.size;
1118}
1119
1123static void
1124pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
1125 if (pm_arguments_node_size(node) == 0) {
1126 node->base.location.start = argument->location.start;
1127 }
1128
1129 node->base.location.end = argument->location.end;
1130 pm_node_list_append(&node->arguments, argument);
1131}
1132
1136static pm_array_node_t *
1137pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
1138 pm_array_node_t *node = PM_ALLOC_NODE(parser, pm_array_node_t);
1139
1140 *node = (pm_array_node_t) {
1141 {
1142 .type = PM_ARRAY_NODE,
1143 .flags = PM_NODE_FLAG_STATIC_LITERAL,
1144 .location = PM_LOCATION_TOKEN_VALUE(opening)
1145 },
1146 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
1147 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
1148 .elements = { 0 }
1149 };
1150
1151 return node;
1152}
1153
1157static inline size_t
1158pm_array_node_size(pm_array_node_t *node) {
1159 return node->elements.size;
1160}
1161
1165static inline void
1166pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
1167 if (!node->elements.size && !node->opening_loc.start) {
1168 node->base.location.start = element->location.start;
1169 }
1170
1171 pm_node_list_append(&node->elements, element);
1172 node->base.location.end = element->location.end;
1173
1174 // If the element is not a static literal, then the array is not a static
1175 // literal. Turn that flag off.
1176 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
1177 pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
1178 }
1179
1180 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
1181 pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
1182 }
1183}
1184
1188static void
1189pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
1190 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
1191 node->base.location.end = closing->end;
1192 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
1193}
1194
1200pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
1201 pm_array_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_array_pattern_node_t);
1202
1203 *node = (pm_array_pattern_node_t) {
1204 {
1205 .type = PM_ARRAY_PATTERN_NODE,
1206 .location = {
1207 .start = nodes->nodes[0]->location.start,
1208 .end = nodes->nodes[nodes->size - 1]->location.end
1209 },
1210 },
1211 .constant = NULL,
1212 .rest = NULL,
1213 .requireds = { 0 },
1214 .posts = { 0 },
1215 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1216 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
1217 };
1218
1219 // For now we're going to just copy over each pointer manually. This could be
1220 // much more efficient, as we could instead resize the node list.
1221 bool found_rest = false;
1222 for (size_t index = 0; index < nodes->size; index++) {
1223 pm_node_t *child = nodes->nodes[index];
1224
1225 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
1226 node->rest = child;
1227 found_rest = true;
1228 } else if (found_rest) {
1229 pm_node_list_append(&node->posts, child);
1230 } else {
1231 pm_node_list_append(&node->requireds, child);
1232 }
1233 }
1234
1235 return node;
1236}
1237
1242pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
1243 pm_array_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_array_pattern_node_t);
1244
1245 *node = (pm_array_pattern_node_t) {
1246 {
1247 .type = PM_ARRAY_PATTERN_NODE,
1248 .location = rest->location,
1249 },
1250 .constant = NULL,
1251 .rest = rest,
1252 .requireds = { 0 },
1253 .posts = { 0 },
1254 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1255 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
1256 };
1257
1258 return node;
1259}
1260
1266pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
1267 pm_array_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_array_pattern_node_t);
1268
1269 *node = (pm_array_pattern_node_t) {
1270 {
1271 .type = PM_ARRAY_PATTERN_NODE,
1272 .location = {
1273 .start = constant->location.start,
1274 .end = closing->end
1275 },
1276 },
1277 .constant = constant,
1278 .rest = NULL,
1279 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
1280 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
1281 .requireds = { 0 },
1282 .posts = { 0 }
1283 };
1284
1285 return node;
1286}
1287
1293pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
1294 pm_array_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_array_pattern_node_t);
1295
1296 *node = (pm_array_pattern_node_t) {
1297 {
1298 .type = PM_ARRAY_PATTERN_NODE,
1299 .location = {
1300 .start = opening->start,
1301 .end = closing->end
1302 },
1303 },
1304 .constant = NULL,
1305 .rest = NULL,
1306 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
1307 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
1308 .requireds = { 0 },
1309 .posts = { 0 }
1310 };
1311
1312 return node;
1313}
1314
1315static inline void
1316pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
1317 pm_node_list_append(&node->requireds, inner);
1318}
1319
1323static pm_assoc_node_t *
1324pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
1325 pm_assoc_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_node_t);
1326 const uint8_t *end;
1327
1328 if (value != NULL) {
1329 end = value->location.end;
1330 } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
1331 end = operator->end;
1332 } else {
1333 end = key->location.end;
1334 }
1335
1336 // If the key and value of this assoc node are both static literals, then
1337 // we can mark this node as a static literal.
1338 pm_node_flags_t flags = 0;
1339 if (value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)) {
1340 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
1341 }
1342
1343 *node = (pm_assoc_node_t) {
1344 {
1345 .type = PM_ASSOC_NODE,
1346 .flags = flags,
1347 .location = {
1348 .start = key->location.start,
1349 .end = end
1350 },
1351 },
1352 .key = key,
1353 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
1354 .value = value
1355 };
1356
1357 return node;
1358}
1359
1363static pm_assoc_splat_node_t *
1364pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
1365 assert(operator->type == PM_TOKEN_USTAR_STAR);
1366 pm_assoc_splat_node_t *node = PM_ALLOC_NODE(parser, pm_assoc_splat_node_t);
1367
1368 *node = (pm_assoc_splat_node_t) {
1369 {
1370 .type = PM_ASSOC_SPLAT_NODE,
1371 .location = {
1372 .start = operator->start,
1373 .end = value == NULL ? operator->end : value->location.end
1374 },
1375 },
1376 .value = value,
1377 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
1378 };
1379
1380 return node;
1381}
1382
1387pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
1388 assert(name->type == PM_TOKEN_BACK_REFERENCE);
1389 pm_back_reference_read_node_t *node = PM_ALLOC_NODE(parser, pm_back_reference_read_node_t);
1390
1392 {
1394 .location = PM_LOCATION_TOKEN_VALUE(name),
1395 },
1396 .name = pm_parser_constant_id_token(parser, name)
1397 };
1398
1399 return node;
1400}
1401
1405static pm_begin_node_t *
1406pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
1407 pm_begin_node_t *node = PM_ALLOC_NODE(parser, pm_begin_node_t);
1408
1409 *node = (pm_begin_node_t) {
1410 {
1411 .type = PM_BEGIN_NODE,
1412 .location = {
1413 .start = begin_keyword->start,
1414 .end = statements == NULL ? begin_keyword->end : statements->base.location.end
1415 },
1416 },
1417 .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
1418 .statements = statements,
1419 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
1420 };
1421
1422 return node;
1423}
1424
1428static void
1429pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
1430 // If the begin keyword doesn't exist, we set the start on the begin_node
1431 if (!node->begin_keyword_loc.start) {
1432 node->base.location.start = rescue_clause->base.location.start;
1433 }
1434 node->base.location.end = rescue_clause->base.location.end;
1435 node->rescue_clause = rescue_clause;
1436}
1437
1441static void
1442pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
1443 node->base.location.end = else_clause->base.location.end;
1444 node->else_clause = else_clause;
1445}
1446
1450static void
1451pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
1452 node->base.location.end = ensure_clause->base.location.end;
1453 node->ensure_clause = ensure_clause;
1454}
1455
1459static void
1460pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
1461 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
1462
1463 node->base.location.end = end_keyword->end;
1464 node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
1465}
1466
1471pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
1472 pm_block_argument_node_t *node = PM_ALLOC_NODE(parser, pm_block_argument_node_t);
1473
1474 *node = (pm_block_argument_node_t) {
1475 {
1476 .type = PM_BLOCK_ARGUMENT_NODE,
1477 .location = {
1478 .start = operator->start,
1479 .end = expression == NULL ? operator->end : expression->location.end
1480 },
1481 },
1482 .expression = expression,
1483 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
1484 };
1485
1486 return node;
1487}
1488
1492static pm_block_node_t *
1493pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, uint32_t locals_body_index, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
1494 pm_block_node_t *node = PM_ALLOC_NODE(parser, pm_block_node_t);
1495
1496 *node = (pm_block_node_t) {
1497 {
1498 .type = PM_BLOCK_NODE,
1499 .location = { .start = opening->start, .end = closing->end },
1500 },
1501 .locals = *locals,
1502 .locals_body_index = locals_body_index,
1503 .parameters = parameters,
1504 .body = body,
1505 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
1506 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
1507 };
1508
1509 return node;
1510}
1511
1516pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
1517 assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
1518 pm_block_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_block_parameter_node_t);
1519
1520 *node = (pm_block_parameter_node_t) {
1521 {
1523 .location = {
1524 .start = operator->start,
1525 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
1526 },
1527 },
1528 .name = pm_parser_optional_constant_id_token(parser, name),
1529 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
1530 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
1531 };
1532
1533 return node;
1534}
1535
1540pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
1541 pm_block_parameters_node_t *node = PM_ALLOC_NODE(parser, pm_block_parameters_node_t);
1542
1543 const uint8_t *start;
1544 if (opening->type != PM_TOKEN_NOT_PROVIDED) {
1545 start = opening->start;
1546 } else if (parameters != NULL) {
1547 start = parameters->base.location.start;
1548 } else {
1549 start = NULL;
1550 }
1551
1552 const uint8_t *end;
1553 if (parameters != NULL) {
1554 end = parameters->base.location.end;
1555 } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
1556 end = opening->end;
1557 } else {
1558 end = NULL;
1559 }
1560
1561 *node = (pm_block_parameters_node_t) {
1562 {
1564 .location = {
1565 .start = start,
1566 .end = end
1567 }
1568 },
1569 .parameters = parameters,
1570 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
1571 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1572 .locals = { 0 }
1573 };
1574
1575 return node;
1576}
1577
1581static void
1582pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
1583 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
1584
1585 node->base.location.end = closing->end;
1586 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
1587}
1588
1593pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
1594 assert(name->type == PM_TOKEN_IDENTIFIER || name->type == PM_TOKEN_MISSING);
1596
1598 {
1600 .location = PM_LOCATION_TOKEN_VALUE(name),
1601 },
1602 .name = pm_parser_constant_id_token(parser, name)
1603 };
1604
1605 return node;
1606}
1607
1611static void
1612pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
1613 pm_node_list_append(&node->locals, (pm_node_t *) local);
1614
1615 if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
1616 node->base.location.end = local->base.location.end;
1617}
1618
1622static pm_break_node_t *
1623pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
1624 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
1625 pm_break_node_t *node = PM_ALLOC_NODE(parser, pm_break_node_t);
1626
1627 *node = (pm_break_node_t) {
1628 {
1629 .type = PM_BREAK_NODE,
1630 .location = {
1631 .start = keyword->start,
1632 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
1633 },
1634 },
1635 .arguments = arguments,
1636 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1637 };
1638
1639 return node;
1640}
1641
1647static pm_call_node_t *
1648pm_call_node_create(pm_parser_t *parser) {
1649 pm_call_node_t *node = PM_ALLOC_NODE(parser, pm_call_node_t);
1650
1651 *node = (pm_call_node_t) {
1652 {
1653 .type = PM_CALL_NODE,
1654 .location = PM_LOCATION_NULL_VALUE(parser),
1655 },
1656 .receiver = NULL,
1657 .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1658 .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1659 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1660 .arguments = NULL,
1661 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
1662 .block = NULL,
1663 .name = 0
1664 };
1665
1666 return node;
1667}
1668
1673static pm_call_node_t *
1674pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
1675 pm_assert_value_expression(parser, receiver);
1676
1677 pm_call_node_t *node = pm_call_node_create(parser);
1678
1679 node->base.location.start = receiver->location.start;
1680 node->base.location.end = pm_arguments_end(arguments);
1681
1682 node->receiver = receiver;
1683 node->message_loc.start = arguments->opening_loc.start;
1684 node->message_loc.end = arguments->closing_loc.end;
1685
1686 node->opening_loc = arguments->opening_loc;
1687 node->arguments = arguments->arguments;
1688 node->closing_loc = arguments->closing_loc;
1689 node->block = arguments->block;
1690
1691 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
1692 return node;
1693}
1694
1698static pm_call_node_t *
1699pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument) {
1700 pm_assert_value_expression(parser, receiver);
1701 pm_assert_value_expression(parser, argument);
1702
1703 pm_call_node_t *node = pm_call_node_create(parser);
1704
1705 node->base.location.start = MIN(receiver->location.start, argument->location.start);
1706 node->base.location.end = MAX(receiver->location.end, argument->location.end);
1707
1708 node->receiver = receiver;
1709 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
1710
1711 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
1712 pm_arguments_node_arguments_append(arguments, argument);
1713 node->arguments = arguments;
1714
1715 node->name = pm_parser_constant_id_token(parser, operator);
1716 return node;
1717}
1718
1722static pm_call_node_t *
1723pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
1724 pm_assert_value_expression(parser, receiver);
1725
1726 pm_call_node_t *node = pm_call_node_create(parser);
1727
1728 node->base.location.start = receiver->location.start;
1729 const uint8_t *end = pm_arguments_end(arguments);
1730 if (end == NULL) {
1731 end = message->end;
1732 }
1733 node->base.location.end = end;
1734
1735 node->receiver = receiver;
1736 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
1737 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
1738 node->opening_loc = arguments->opening_loc;
1739 node->arguments = arguments->arguments;
1740 node->closing_loc = arguments->closing_loc;
1741 node->block = arguments->block;
1742
1743 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
1744 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
1745 }
1746
1747 node->name = pm_parser_constant_id_token(parser, message);
1748 return node;
1749}
1750
1755static pm_call_node_t *
1756pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
1757 pm_call_node_t *node = pm_call_node_create(parser);
1758
1759 node->base.location.start = message->start;
1760 node->base.location.end = pm_arguments_end(arguments);
1761
1762 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
1763 node->opening_loc = arguments->opening_loc;
1764 node->arguments = arguments->arguments;
1765 node->closing_loc = arguments->closing_loc;
1766 node->block = arguments->block;
1767
1768 node->name = pm_parser_constant_id_token(parser, message);
1769 return node;
1770}
1771
1775static pm_call_node_t *
1776pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
1777 pm_assert_value_expression(parser, receiver);
1778
1779 pm_call_node_t *node = pm_call_node_create(parser);
1780
1781 node->base.location.start = message->start;
1782 if (arguments->closing_loc.start != NULL) {
1783 node->base.location.end = arguments->closing_loc.end;
1784 } else {
1785 node->base.location.end = receiver->location.end;
1786 }
1787
1788 node->receiver = receiver;
1789 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
1790 node->opening_loc = arguments->opening_loc;
1791 node->arguments = arguments->arguments;
1792 node->closing_loc = arguments->closing_loc;
1793
1794 node->name = pm_parser_constant_id_constant(parser, "!", 1);
1795 return node;
1796}
1797
1801static pm_call_node_t *
1802pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
1803 pm_assert_value_expression(parser, receiver);
1804
1805 pm_call_node_t *node = pm_call_node_create(parser);
1806
1807 node->base.location.start = receiver->location.start;
1808 node->base.location.end = pm_arguments_end(arguments);
1809
1810 node->receiver = receiver;
1811 node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
1812 node->opening_loc = arguments->opening_loc;
1813 node->arguments = arguments->arguments;
1814 node->closing_loc = arguments->closing_loc;
1815 node->block = arguments->block;
1816
1817 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
1818 pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
1819 }
1820
1821 node->name = pm_parser_constant_id_constant(parser, "call", 4);
1822 return node;
1823}
1824
1828static pm_call_node_t *
1829pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
1830 pm_assert_value_expression(parser, receiver);
1831
1832 pm_call_node_t *node = pm_call_node_create(parser);
1833
1834 node->base.location.start = operator->start;
1835 node->base.location.end = receiver->location.end;
1836
1837 node->receiver = receiver;
1838 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
1839
1840 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
1841 return node;
1842}
1843
1848static pm_call_node_t *
1849pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
1850 pm_call_node_t *node = pm_call_node_create(parser);
1851
1852 node->base.location = PM_LOCATION_TOKEN_VALUE(message);
1853 node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
1854
1855 node->name = pm_parser_constant_id_token(parser, message);
1856 return node;
1857}
1858
1863static inline bool
1864pm_call_node_variable_call_p(pm_call_node_t *node) {
1866}
1867
1872static inline bool
1873pm_call_node_index_p(pm_call_node_t *node) {
1874 return (
1875 (node->call_operator_loc.start == NULL) &&
1876 (node->message_loc.start != NULL) &&
1877 (node->message_loc.start[0] == '[') &&
1878 (node->message_loc.end[-1] == ']') &&
1879 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE))
1880 );
1881}
1882
1887static inline bool
1888pm_call_node_writable_p(pm_call_node_t *node) {
1889 return (
1890 (node->message_loc.start != NULL) &&
1891 (node->message_loc.end[-1] != '!') &&
1892 (node->message_loc.end[-1] != '?') &&
1893 (node->opening_loc.start == NULL) &&
1894 (node->arguments == NULL) &&
1895 (node->block == NULL)
1896 );
1897}
1898
1902static void
1903pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
1904 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
1905
1906 if (write_constant->length > 0) {
1907 size_t length = write_constant->length - 1;
1908
1909 void *memory = malloc(length);
1910 memcpy(memory, write_constant->start, length);
1911
1912 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
1913 } else {
1914 // We can get here if the message was missing because of a syntax error.
1915 *read_name = pm_parser_constant_id_constant(parser, "", 0);
1916 }
1917}
1918
1923pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
1924 assert(target->block == NULL);
1925 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1926 pm_call_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_call_and_write_node_t);
1927
1928 *node = (pm_call_and_write_node_t) {
1929 {
1930 .type = PM_CALL_AND_WRITE_NODE,
1931 .flags = target->base.flags,
1932 .location = {
1933 .start = target->base.location.start,
1934 .end = value->location.end
1935 }
1936 },
1937 .receiver = target->receiver,
1938 .call_operator_loc = target->call_operator_loc,
1939 .message_loc = target->message_loc,
1940 .read_name = 0,
1941 .write_name = target->name,
1942 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
1943 .value = value
1944 };
1945
1946 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
1947
1948 // Here we're going to free the target, since it is no longer necessary.
1949 // However, we don't want to call `pm_node_destroy` because we want to keep
1950 // around all of its children since we just reused them.
1951 free(target);
1952
1953 return node;
1954}
1955
1960pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
1961 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
1962 pm_index_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_and_write_node_t);
1963
1964 *node = (pm_index_and_write_node_t) {
1965 {
1967 .flags = target->base.flags,
1968 .location = {
1969 .start = target->base.location.start,
1970 .end = value->location.end
1971 }
1972 },
1973 .receiver = target->receiver,
1974 .call_operator_loc = target->call_operator_loc,
1975 .opening_loc = target->opening_loc,
1976 .arguments = target->arguments,
1977 .closing_loc = target->closing_loc,
1978 .block = target->block,
1979 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
1980 .value = value
1981 };
1982
1983 // Here we're going to free the target, since it is no longer necessary.
1984 // However, we don't want to call `pm_node_destroy` because we want to keep
1985 // around all of its children since we just reused them.
1986 free(target);
1987
1988 return node;
1989}
1990
1995pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
1996 assert(target->block == NULL);
1997 pm_call_operator_write_node_t *node = PM_ALLOC_NODE(parser, pm_call_operator_write_node_t);
1998
2000 {
2002 .flags = target->base.flags,
2003 .location = {
2004 .start = target->base.location.start,
2005 .end = value->location.end
2006 }
2007 },
2008 .receiver = target->receiver,
2009 .call_operator_loc = target->call_operator_loc,
2010 .message_loc = target->message_loc,
2011 .read_name = 0,
2012 .write_name = target->name,
2013 .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2014 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2015 .value = value
2016 };
2017
2018 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2019
2020 // Here we're going to free the target, since it is no longer necessary.
2021 // However, we don't want to call `pm_node_destroy` because we want to keep
2022 // around all of its children since we just reused them.
2023 free(target);
2024
2025 return node;
2026}
2027
2032pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2034
2036 {
2038 .flags = target->base.flags,
2039 .location = {
2040 .start = target->base.location.start,
2041 .end = value->location.end
2042 }
2043 },
2044 .receiver = target->receiver,
2045 .call_operator_loc = target->call_operator_loc,
2046 .opening_loc = target->opening_loc,
2047 .arguments = target->arguments,
2048 .closing_loc = target->closing_loc,
2049 .block = target->block,
2050 .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
2051 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2052 .value = value
2053 };
2054
2055 // Here we're going to free the target, since it is no longer necessary.
2056 // However, we don't want to call `pm_node_destroy` because we want to keep
2057 // around all of its children since we just reused them.
2058 free(target);
2059
2060 return node;
2061}
2062
2067pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2068 assert(target->block == NULL);
2069 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
2070 pm_call_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_call_or_write_node_t);
2071
2072 *node = (pm_call_or_write_node_t) {
2073 {
2074 .type = PM_CALL_OR_WRITE_NODE,
2075 .flags = target->base.flags,
2076 .location = {
2077 .start = target->base.location.start,
2078 .end = value->location.end
2079 }
2080 },
2081 .receiver = target->receiver,
2082 .call_operator_loc = target->call_operator_loc,
2083 .message_loc = target->message_loc,
2084 .read_name = 0,
2085 .write_name = target->name,
2086 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2087 .value = value
2088 };
2089
2090 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2091
2092 // Here we're going to free the target, since it is no longer necessary.
2093 // However, we don't want to call `pm_node_destroy` because we want to keep
2094 // around all of its children since we just reused them.
2095 free(target);
2096
2097 return node;
2098}
2099
2104pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2105 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
2106 pm_index_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_index_or_write_node_t);
2107
2108 *node = (pm_index_or_write_node_t) {
2109 {
2110 .type = PM_INDEX_OR_WRITE_NODE,
2111 .flags = target->base.flags,
2112 .location = {
2113 .start = target->base.location.start,
2114 .end = value->location.end
2115 }
2116 },
2117 .receiver = target->receiver,
2118 .call_operator_loc = target->call_operator_loc,
2119 .opening_loc = target->opening_loc,
2120 .arguments = target->arguments,
2121 .closing_loc = target->closing_loc,
2122 .block = target->block,
2123 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2124 .value = value
2125 };
2126
2127 // Here we're going to free the target, since it is no longer necessary.
2128 // However, we don't want to call `pm_node_destroy` because we want to keep
2129 // around all of its children since we just reused them.
2130 free(target);
2131
2132 return node;
2133}
2134
2139static pm_call_target_node_t *
2140pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2141 pm_call_target_node_t *node = PM_ALLOC_NODE(parser, pm_call_target_node_t);
2142
2143 *node = (pm_call_target_node_t) {
2144 {
2145 .type = PM_CALL_TARGET_NODE,
2146 .flags = target->base.flags,
2147 .location = target->base.location
2148 },
2149 .receiver = target->receiver,
2150 .call_operator_loc = target->call_operator_loc,
2151 .name = target->name,
2152 .message_loc = target->message_loc
2153 };
2154
2155 // Here we're going to free the target, since it is no longer necessary.
2156 // However, we don't want to call `pm_node_destroy` because we want to keep
2157 // around all of its children since we just reused them.
2158 free(target);
2159
2160 return node;
2161}
2162
2168pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
2169 pm_index_target_node_t *node = PM_ALLOC_NODE(parser, pm_index_target_node_t);
2170
2171 *node = (pm_index_target_node_t) {
2172 {
2173 .type = PM_INDEX_TARGET_NODE,
2174 .flags = target->base.flags,
2175 .location = target->base.location
2176 },
2177 .receiver = target->receiver,
2178 .opening_loc = target->opening_loc,
2179 .arguments = target->arguments,
2180 .closing_loc = target->closing_loc,
2181 .block = target->block
2182 };
2183
2184 // Here we're going to free the target, since it is no longer necessary.
2185 // However, we don't want to call `pm_node_destroy` because we want to keep
2186 // around all of its children since we just reused them.
2187 free(target);
2188
2189 return node;
2190}
2191
2196pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *target, const pm_token_t *operator) {
2197 pm_capture_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_capture_pattern_node_t);
2198
2199 *node = (pm_capture_pattern_node_t) {
2200 {
2202 .location = {
2203 .start = value->location.start,
2204 .end = target->location.end
2205 },
2206 },
2207 .value = value,
2208 .target = target,
2209 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2210 };
2211
2212 return node;
2213}
2214
2218static pm_case_node_t *
2219pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
2220 pm_case_node_t *node = PM_ALLOC_NODE(parser, pm_case_node_t);
2221
2222 *node = (pm_case_node_t) {
2223 {
2224 .type = PM_CASE_NODE,
2225 .location = {
2226 .start = case_keyword->start,
2227 .end = end_keyword->end
2228 },
2229 },
2230 .predicate = predicate,
2231 .consequent = NULL,
2232 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
2233 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
2234 .conditions = { 0 }
2235 };
2236
2237 return node;
2238}
2239
2243static void
2244pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
2245 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
2246
2247 pm_node_list_append(&node->conditions, condition);
2248 node->base.location.end = condition->location.end;
2249}
2250
2254static void
2255pm_case_node_consequent_set(pm_case_node_t *node, pm_else_node_t *consequent) {
2256 node->consequent = consequent;
2257 node->base.location.end = consequent->base.location.end;
2258}
2259
2263static void
2264pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
2265 node->base.location.end = end_keyword->end;
2266 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
2267}
2268
2272static pm_case_match_node_t *
2273pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
2274 pm_case_match_node_t *node = PM_ALLOC_NODE(parser, pm_case_match_node_t);
2275
2276 *node = (pm_case_match_node_t) {
2277 {
2278 .type = PM_CASE_MATCH_NODE,
2279 .location = {
2280 .start = case_keyword->start,
2281 .end = end_keyword->end
2282 },
2283 },
2284 .predicate = predicate,
2285 .consequent = NULL,
2286 .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
2287 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
2288 .conditions = { 0 }
2289 };
2290
2291 return node;
2292}
2293
2297static void
2298pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
2299 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
2300
2301 pm_node_list_append(&node->conditions, condition);
2302 node->base.location.end = condition->location.end;
2303}
2304
2308static void
2309pm_case_match_node_consequent_set(pm_case_match_node_t *node, pm_else_node_t *consequent) {
2310 node->consequent = consequent;
2311 node->base.location.end = consequent->base.location.end;
2312}
2313
2317static void
2318pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
2319 node->base.location.end = end_keyword->end;
2320 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
2321}
2322
2326static pm_class_node_t *
2327pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
2328 pm_class_node_t *node = PM_ALLOC_NODE(parser, pm_class_node_t);
2329
2330 *node = (pm_class_node_t) {
2331 {
2332 .type = PM_CLASS_NODE,
2333 .location = { .start = class_keyword->start, .end = end_keyword->end },
2334 },
2335 .locals = *locals,
2336 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
2337 .constant_path = constant_path,
2338 .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
2339 .superclass = superclass,
2340 .body = body,
2341 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
2342 .name = pm_parser_constant_id_token(parser, name)
2343 };
2344
2345 return node;
2346}
2347
2352pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2353 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2355
2357 {
2359 .location = {
2360 .start = target->base.location.start,
2361 .end = value->location.end
2362 }
2363 },
2364 .name = target->name,
2365 .name_loc = target->base.location,
2366 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2367 .value = value
2368 };
2369
2370 return node;
2371}
2372
2377pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2379
2381 {
2383 .location = {
2384 .start = target->base.location.start,
2385 .end = value->location.end
2386 }
2387 },
2388 .name = target->name,
2389 .name_loc = target->base.location,
2390 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2391 .value = value,
2392 .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
2393 };
2394
2395 return node;
2396}
2397
2402pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2403 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
2405
2407 {
2409 .location = {
2410 .start = target->base.location.start,
2411 .end = value->location.end
2412 }
2413 },
2414 .name = target->name,
2415 .name_loc = target->base.location,
2416 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2417 .value = value
2418 };
2419
2420 return node;
2421}
2422
2427pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
2428 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
2429 pm_class_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_class_variable_read_node_t);
2430
2432 {
2434 .location = PM_LOCATION_TOKEN_VALUE(token)
2435 },
2436 .name = pm_parser_constant_id_token(parser, token)
2437 };
2438
2439 return node;
2440}
2441
2446pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
2448
2450 {
2452 .location = {
2453 .start = read_node->base.location.start,
2454 .end = value->location.end
2455 },
2456 },
2457 .name = read_node->name,
2458 .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
2459 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2460 .value = value
2461 };
2462
2463 return node;
2464}
2465
2470pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2471 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2473
2475 {
2477 .location = {
2478 .start = target->base.location.start,
2479 .end = value->location.end
2480 }
2481 },
2482 .target = target,
2483 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2484 .value = value
2485 };
2486
2487 return node;
2488}
2489
2494pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2496
2498 {
2500 .location = {
2501 .start = target->base.location.start,
2502 .end = value->location.end
2503 }
2504 },
2505 .target = target,
2506 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2507 .value = value,
2508 .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
2509 };
2510
2511 return node;
2512}
2513
2518pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2519 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
2521
2523 {
2525 .location = {
2526 .start = target->base.location.start,
2527 .end = value->location.end
2528 }
2529 },
2530 .target = target,
2531 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2532 .value = value
2533 };
2534
2535 return node;
2536}
2537
2542pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, pm_node_t *child) {
2543 pm_assert_value_expression(parser, parent);
2544
2545 pm_constant_path_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_node_t);
2546
2547 *node = (pm_constant_path_node_t) {
2548 {
2549 .type = PM_CONSTANT_PATH_NODE,
2550 .location = {
2551 .start = parent == NULL ? delimiter->start : parent->location.start,
2552 .end = child->location.end
2553 },
2554 },
2555 .parent = parent,
2556 .child = child,
2557 .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter)
2558 };
2559
2560 return node;
2561}
2562
2567pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2568 pm_constant_path_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_path_write_node_t);
2569
2571 {
2573 .location = {
2574 .start = target->base.location.start,
2575 .end = value->location.end
2576 },
2577 },
2578 .target = target,
2579 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2580 .value = value
2581 };
2582
2583 return node;
2584}
2585
2590pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2591 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2592 pm_constant_and_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_and_write_node_t);
2593
2595 {
2597 .location = {
2598 .start = target->base.location.start,
2599 .end = value->location.end
2600 }
2601 },
2602 .name = target->name,
2603 .name_loc = target->base.location,
2604 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2605 .value = value
2606 };
2607
2608 return node;
2609}
2610
2615pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2617
2619 {
2621 .location = {
2622 .start = target->base.location.start,
2623 .end = value->location.end
2624 }
2625 },
2626 .name = target->name,
2627 .name_loc = target->base.location,
2628 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2629 .value = value,
2630 .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
2631 };
2632
2633 return node;
2634}
2635
2640pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2641 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
2642 pm_constant_or_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_or_write_node_t);
2643
2644 *node = (pm_constant_or_write_node_t) {
2645 {
2647 .location = {
2648 .start = target->base.location.start,
2649 .end = value->location.end
2650 }
2651 },
2652 .name = target->name,
2653 .name_loc = target->base.location,
2654 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2655 .value = value
2656 };
2657
2658 return node;
2659}
2660
2665pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2666 assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
2667 pm_constant_read_node_t *node = PM_ALLOC_NODE(parser, pm_constant_read_node_t);
2668
2669 *node = (pm_constant_read_node_t) {
2670 {
2671 .type = PM_CONSTANT_READ_NODE,
2672 .location = PM_LOCATION_TOKEN_VALUE(name)
2673 },
2674 .name = pm_parser_constant_id_token(parser, name)
2675 };
2676
2677 return node;
2678}
2679
2684pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2685 pm_constant_write_node_t *node = PM_ALLOC_NODE(parser, pm_constant_write_node_t);
2686
2687 *node = (pm_constant_write_node_t) {
2688 {
2689 .type = PM_CONSTANT_WRITE_NODE,
2690 .location = {
2691 .start = target->base.location.start,
2692 .end = value->location.end
2693 }
2694 },
2695 .name = target->name,
2696 .name_loc = target->base.location,
2697 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2698 .value = value
2699 };
2700
2701 return node;
2702}
2703
2707static pm_def_node_t *
2708pm_def_node_create(
2709 pm_parser_t *parser,
2710 const pm_token_t *name,
2711 pm_node_t *receiver,
2712 pm_parameters_node_t *parameters,
2713 pm_node_t *body,
2714 pm_constant_id_list_t *locals,
2715 uint32_t locals_body_index,
2716 const pm_token_t *def_keyword,
2717 const pm_token_t *operator,
2718 const pm_token_t *lparen,
2719 const pm_token_t *rparen,
2720 const pm_token_t *equal,
2721 const pm_token_t *end_keyword
2722) {
2723 pm_def_node_t *node = PM_ALLOC_NODE(parser, pm_def_node_t);
2724 const uint8_t *end;
2725
2726 if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
2727 end = body->location.end;
2728 } else {
2729 end = end_keyword->end;
2730 }
2731
2732 *node = (pm_def_node_t) {
2733 {
2734 .type = PM_DEF_NODE,
2735 .location = { .start = def_keyword->start, .end = end },
2736 },
2737 .name = pm_parser_constant_id_token(parser, name),
2738 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
2739 .receiver = receiver,
2740 .parameters = parameters,
2741 .body = body,
2742 .locals = *locals,
2743 .locals_body_index = locals_body_index,
2744 .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
2745 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2746 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
2747 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
2748 .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
2749 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
2750 };
2751
2752 return node;
2753}
2754
2758static pm_defined_node_t *
2759pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
2760 pm_defined_node_t *node = PM_ALLOC_NODE(parser, pm_defined_node_t);
2761
2762 *node = (pm_defined_node_t) {
2763 {
2764 .type = PM_DEFINED_NODE,
2765 .location = {
2766 .start = keyword_loc->start,
2767 .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
2768 },
2769 },
2770 .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
2771 .value = value,
2772 .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
2773 .keyword_loc = *keyword_loc
2774 };
2775
2776 return node;
2777}
2778
2782static pm_else_node_t *
2783pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
2784 pm_else_node_t *node = PM_ALLOC_NODE(parser, pm_else_node_t);
2785 const uint8_t *end = NULL;
2786 if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
2787 end = statements->base.location.end;
2788 } else {
2789 end = end_keyword->end;
2790 }
2791
2792 *node = (pm_else_node_t) {
2793 {
2794 .type = PM_ELSE_NODE,
2795 .location = {
2796 .start = else_keyword->start,
2797 .end = end,
2798 },
2799 },
2800 .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
2801 .statements = statements,
2802 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
2803 };
2804
2805 return node;
2806}
2807
2812pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
2813 pm_embedded_statements_node_t *node = PM_ALLOC_NODE(parser, pm_embedded_statements_node_t);
2814
2816 {
2818 .location = {
2819 .start = opening->start,
2820 .end = closing->end
2821 }
2822 },
2823 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2824 .statements = statements,
2825 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2826 };
2827
2828 return node;
2829}
2830
2835pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
2836 pm_embedded_variable_node_t *node = PM_ALLOC_NODE(parser, pm_embedded_variable_node_t);
2837
2838 *node = (pm_embedded_variable_node_t) {
2839 {
2841 .location = {
2842 .start = operator->start,
2843 .end = variable->location.end
2844 }
2845 },
2846 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2847 .variable = variable
2848 };
2849
2850 return node;
2851}
2852
2856static pm_ensure_node_t *
2857pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
2858 pm_ensure_node_t *node = PM_ALLOC_NODE(parser, pm_ensure_node_t);
2859
2860 *node = (pm_ensure_node_t) {
2861 {
2862 .type = PM_ENSURE_NODE,
2863 .location = {
2864 .start = ensure_keyword->start,
2865 .end = end_keyword->end
2866 },
2867 },
2868 .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
2869 .statements = statements,
2870 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
2871 };
2872
2873 return node;
2874}
2875
2879static pm_false_node_t *
2880pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
2881 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
2882 pm_false_node_t *node = PM_ALLOC_NODE(parser, pm_false_node_t);
2883
2884 *node = (pm_false_node_t) {{
2885 .type = PM_FALSE_NODE,
2886 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2887 .location = PM_LOCATION_TOKEN_VALUE(token)
2888 }};
2889
2890 return node;
2891}
2892
2898pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2899 pm_find_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_find_pattern_node_t);
2900
2901 pm_node_t *left = nodes->nodes[0];
2902 pm_node_t *right;
2903
2904 if (nodes->size == 1) {
2905 right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
2906 } else {
2907 right = nodes->nodes[nodes->size - 1];
2908 }
2909
2910 *node = (pm_find_pattern_node_t) {
2911 {
2912 .type = PM_FIND_PATTERN_NODE,
2913 .location = {
2914 .start = left->location.start,
2915 .end = right->location.end,
2916 },
2917 },
2918 .constant = NULL,
2919 .left = left,
2920 .right = right,
2921 .requireds = { 0 },
2922 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2923 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2924 };
2925
2926 // For now we're going to just copy over each pointer manually. This could be
2927 // much more efficient, as we could instead resize the node list to only point
2928 // to 1...-1.
2929 for (size_t index = 1; index < nodes->size - 1; index++) {
2930 pm_node_list_append(&node->requireds, nodes->nodes[index]);
2931 }
2932
2933 return node;
2934}
2935
2939static pm_float_node_t *
2940pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
2941 assert(token->type == PM_TOKEN_FLOAT);
2942 pm_float_node_t *node = PM_ALLOC_NODE(parser, pm_float_node_t);
2943
2944 *node = (pm_float_node_t) {{
2945 .type = PM_FLOAT_NODE,
2946 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2947 .location = PM_LOCATION_TOKEN_VALUE(token)
2948 }};
2949
2950 return node;
2951}
2952
2956static pm_imaginary_node_t *
2957pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
2958 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
2959
2960 pm_imaginary_node_t *node = PM_ALLOC_NODE(parser, pm_imaginary_node_t);
2961 *node = (pm_imaginary_node_t) {
2962 {
2963 .type = PM_IMAGINARY_NODE,
2964 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2965 .location = PM_LOCATION_TOKEN_VALUE(token)
2966 },
2967 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
2968 .type = PM_TOKEN_FLOAT,
2969 .start = token->start,
2970 .end = token->end - 1
2971 }))
2972 };
2973
2974 return node;
2975}
2976
2980static pm_rational_node_t *
2981pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
2982 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
2983
2984 pm_rational_node_t *node = PM_ALLOC_NODE(parser, pm_rational_node_t);
2985 *node = (pm_rational_node_t) {
2986 {
2987 .type = PM_RATIONAL_NODE,
2988 .flags = PM_NODE_FLAG_STATIC_LITERAL,
2989 .location = PM_LOCATION_TOKEN_VALUE(token)
2990 },
2991 .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
2992 .type = PM_TOKEN_FLOAT,
2993 .start = token->start,
2994 .end = token->end - 1
2995 }))
2996 };
2997
2998 return node;
2999}
3000
3005static pm_imaginary_node_t *
3006pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3007 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
3008
3009 pm_imaginary_node_t *node = PM_ALLOC_NODE(parser, pm_imaginary_node_t);
3010 *node = (pm_imaginary_node_t) {
3011 {
3012 .type = PM_IMAGINARY_NODE,
3013 .flags = PM_NODE_FLAG_STATIC_LITERAL,
3014 .location = PM_LOCATION_TOKEN_VALUE(token)
3015 },
3016 .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
3018 .start = token->start,
3019 .end = token->end - 1
3020 }))
3021 };
3022
3023 return node;
3024}
3025
3029static pm_for_node_t *
3030pm_for_node_create(
3031 pm_parser_t *parser,
3032 pm_node_t *index,
3033 pm_node_t *collection,
3034 pm_statements_node_t *statements,
3035 const pm_token_t *for_keyword,
3036 const pm_token_t *in_keyword,
3037 const pm_token_t *do_keyword,
3038 const pm_token_t *end_keyword
3039) {
3040 pm_for_node_t *node = PM_ALLOC_NODE(parser, pm_for_node_t);
3041
3042 *node = (pm_for_node_t) {
3043 {
3044 .type = PM_FOR_NODE,
3045 .location = {
3046 .start = for_keyword->start,
3047 .end = end_keyword->end
3048 },
3049 },
3050 .index = index,
3051 .collection = collection,
3052 .statements = statements,
3053 .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
3054 .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
3055 .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
3056 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
3057 };
3058
3059 return node;
3060}
3061
3066pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
3067 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
3069 *node = (pm_forwarding_arguments_node_t) {{ .type = PM_FORWARDING_ARGUMENTS_NODE, .location = PM_LOCATION_TOKEN_VALUE(token) }};
3070 return node;
3071}
3072
3077pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
3078 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
3080 *node = (pm_forwarding_parameter_node_t) {{ .type = PM_FORWARDING_PARAMETER_NODE, .location = PM_LOCATION_TOKEN_VALUE(token) }};
3081 return node;
3082}
3083
3088pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
3089 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
3090 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
3091 pm_forwarding_super_node_t *node = PM_ALLOC_NODE(parser, pm_forwarding_super_node_t);
3092
3093 pm_block_node_t *block = NULL;
3094 if (arguments->block != NULL) {
3095 block = (pm_block_node_t *) arguments->block;
3096 }
3097
3098 *node = (pm_forwarding_super_node_t) {
3099 {
3101 .location = {
3102 .start = token->start,
3103 .end = block != NULL ? block->base.location.end : token->end
3104 },
3105 },
3106 .block = block
3107 };
3108
3109 return node;
3110}
3111
3117pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
3118 pm_hash_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_hash_pattern_node_t);
3119
3120 *node = (pm_hash_pattern_node_t) {
3121 {
3122 .type = PM_HASH_PATTERN_NODE,
3123 .location = {
3124 .start = opening->start,
3125 .end = closing->end
3126 },
3127 },
3128 .constant = NULL,
3129 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3130 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
3131 .elements = { 0 },
3132 .rest = NULL
3133 };
3134
3135 return node;
3136}
3137
3142pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
3143 pm_hash_pattern_node_t *node = PM_ALLOC_NODE(parser, pm_hash_pattern_node_t);
3144
3145 const uint8_t *start;
3146 const uint8_t *end;
3147
3148 if (elements->size > 0) {
3149 if (rest) {
3150 start = elements->nodes[0]->location.start;
3151 end = rest->location.end;
3152 } else {
3153 start = elements->nodes[0]->location.start;
3154 end = elements->nodes[elements->size - 1]->location.end;
3155 }
3156 } else {
3157 assert(rest != NULL);
3158 start = rest->location.start;
3159 end = rest->location.end;
3160 }
3161
3162 *node = (pm_hash_pattern_node_t) {
3163 {
3164 .type = PM_HASH_PATTERN_NODE,
3165 .location = {
3166 .start = start,
3167 .end = end
3168 },
3169 },
3170 .constant = NULL,
3171 .elements = { 0 },
3172 .rest = rest,
3173 .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3174 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
3175 };
3176
3177 for (size_t index = 0; index < elements->size; index++) {
3178 pm_node_t *element = elements->nodes[index];
3179 pm_node_list_append(&node->elements, element);
3180 }
3181
3182 return node;
3183}
3184
3188static pm_constant_id_t
3189pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
3190 switch (PM_NODE_TYPE(target)) {
3192 return ((pm_global_variable_read_node_t *) target)->name;
3194 return ((pm_back_reference_read_node_t *) target)->name;
3196 // This will only ever happen in the event of a syntax error, but we
3197 // still need to provide something for the node.
3198 return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
3199 default:
3200 assert(false && "unreachable");
3201 return (pm_constant_id_t) -1;
3202 }
3203}
3204
3209pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3210 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3212
3214 {
3216 .location = {
3217 .start = target->location.start,
3218 .end = value->location.end
3219 }
3220 },
3221 .name = pm_global_variable_write_name(parser, target),
3222 .name_loc = target->location,
3223 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3224 .value = value
3225 };
3226
3227 return node;
3228}
3229
3234pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3236
3238 {
3240 .location = {
3241 .start = target->location.start,
3242 .end = value->location.end
3243 }
3244 },
3245 .name = pm_global_variable_write_name(parser, target),
3246 .name_loc = target->location,
3247 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3248 .value = value,
3249 .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3250 };
3251
3252 return node;
3253}
3254
3259pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3260 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3262
3264 {
3266 .location = {
3267 .start = target->location.start,
3268 .end = value->location.end
3269 }
3270 },
3271 .name = pm_global_variable_write_name(parser, target),
3272 .name_loc = target->location,
3273 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3274 .value = value
3275 };
3276
3277 return node;
3278}
3279
3284pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3286
3288 {
3290 .location = PM_LOCATION_TOKEN_VALUE(name),
3291 },
3292 .name = pm_parser_constant_id_token(parser, name)
3293 };
3294
3295 return node;
3296}
3297
3302pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3304
3306 {
3308 .location = {
3309 .start = target->location.start,
3310 .end = value->location.end
3311 },
3312 },
3313 .name = pm_global_variable_write_name(parser, target),
3314 .name_loc = PM_LOCATION_NODE_VALUE(target),
3315 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3316 .value = value
3317 };
3318
3319 return node;
3320}
3321
3325static pm_hash_node_t *
3326pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
3327 assert(opening != NULL);
3328 pm_hash_node_t *node = PM_ALLOC_NODE(parser, pm_hash_node_t);
3329
3330 *node = (pm_hash_node_t) {
3331 {
3332 .type = PM_HASH_NODE,
3333 .flags = PM_NODE_FLAG_STATIC_LITERAL,
3334 .location = PM_LOCATION_TOKEN_VALUE(opening)
3335 },
3336 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3337 .closing_loc = PM_LOCATION_NULL_VALUE(parser),
3338 .elements = { 0 }
3339 };
3340
3341 return node;
3342}
3343
3347static inline void
3348pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
3349 pm_node_list_append(&hash->elements, element);
3350
3351 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
3352 if (static_literal) {
3353 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
3354 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
3355 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
3356 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
3357 }
3358
3359 if (!static_literal) {
3360 pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
3361 }
3362}
3363
3364static inline void
3365pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
3366 hash->base.location.end = token->end;
3367 hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
3368}
3369
3373static pm_if_node_t *
3374pm_if_node_create(pm_parser_t *parser,
3375 const pm_token_t *if_keyword,
3376 pm_node_t *predicate,
3377 const pm_token_t *then_keyword,
3378 pm_statements_node_t *statements,
3379 pm_node_t *consequent,
3380 const pm_token_t *end_keyword
3381) {
3382 pm_conditional_predicate(predicate);
3383 pm_if_node_t *node = PM_ALLOC_NODE(parser, pm_if_node_t);
3384
3385 const uint8_t *end;
3386 if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
3387 end = end_keyword->end;
3388 } else if (consequent != NULL) {
3389 end = consequent->location.end;
3390 } else if (pm_statements_node_body_length(statements) != 0) {
3391 end = statements->base.location.end;
3392 } else {
3393 end = predicate->location.end;
3394 }
3395
3396 *node = (pm_if_node_t) {
3397 {
3398 .type = PM_IF_NODE,
3399 .flags = PM_NODE_FLAG_NEWLINE,
3400 .location = {
3401 .start = if_keyword->start,
3402 .end = end
3403 },
3404 },
3405 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
3406 .predicate = predicate,
3407 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
3408 .statements = statements,
3409 .consequent = consequent,
3410 .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3411 };
3412
3413 return node;
3414}
3415
3419static pm_if_node_t *
3420pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
3421 pm_conditional_predicate(predicate);
3422 pm_if_node_t *node = PM_ALLOC_NODE(parser, pm_if_node_t);
3423
3424 pm_statements_node_t *statements = pm_statements_node_create(parser);
3425 pm_statements_node_body_append(statements, statement);
3426
3427 *node = (pm_if_node_t) {
3428 {
3429 .type = PM_IF_NODE,
3430 .flags = PM_NODE_FLAG_NEWLINE,
3431 .location = {
3432 .start = statement->location.start,
3433 .end = predicate->location.end
3434 },
3435 },
3436 .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
3437 .predicate = predicate,
3438 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3439 .statements = statements,
3440 .consequent = NULL,
3441 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
3442 };
3443
3444 return node;
3445}
3446
3450static pm_if_node_t *
3451pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
3452 pm_assert_value_expression(parser, predicate);
3453 pm_conditional_predicate(predicate);
3454
3455 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
3456 pm_statements_node_body_append(if_statements, true_expression);
3457
3458 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
3459 pm_statements_node_body_append(else_statements, false_expression);
3460
3461 pm_token_t end_keyword = not_provided(parser);
3462 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
3463
3464 pm_if_node_t *node = PM_ALLOC_NODE(parser, pm_if_node_t);
3465
3466 *node = (pm_if_node_t) {
3467 {
3468 .type = PM_IF_NODE,
3469 .flags = PM_NODE_FLAG_NEWLINE,
3470 .location = {
3471 .start = predicate->location.start,
3472 .end = false_expression->location.end,
3473 },
3474 },
3475 .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3476 .predicate = predicate,
3477 .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
3478 .statements = if_statements,
3479 .consequent = (pm_node_t *)else_node,
3480 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
3481 };
3482
3483 return node;
3484
3485}
3486
3487static inline void
3488pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
3489 node->base.location.end = keyword->end;
3490 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
3491}
3492
3493static inline void
3494pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
3495 node->base.location.end = keyword->end;
3496 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
3497}
3498
3502static pm_implicit_node_t *
3503pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
3504 pm_implicit_node_t *node = PM_ALLOC_NODE(parser, pm_implicit_node_t);
3505
3506 *node = (pm_implicit_node_t) {
3507 {
3508 .type = PM_IMPLICIT_NODE,
3509 .location = value->location
3510 },
3511 .value = value
3512 };
3513
3514 return node;
3515}
3516
3521pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
3522 assert(token->type == PM_TOKEN_COMMA);
3523
3524 pm_implicit_rest_node_t *node = PM_ALLOC_NODE(parser, pm_implicit_rest_node_t);
3525
3526 *node = (pm_implicit_rest_node_t) {
3527 {
3528 .type = PM_IMPLICIT_REST_NODE,
3529 .location = PM_LOCATION_TOKEN_VALUE(token)
3530 }
3531 };
3532
3533 return node;
3534}
3535
3539static pm_integer_node_t *
3540pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
3541 assert(token->type == PM_TOKEN_INTEGER);
3542 pm_integer_node_t *node = PM_ALLOC_NODE(parser, pm_integer_node_t);
3543
3544 *node = (pm_integer_node_t) {{
3545 .type = PM_INTEGER_NODE,
3546 .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
3547 .location = PM_LOCATION_TOKEN_VALUE(token)
3548 }};
3549
3550 return node;
3551}
3552
3557static pm_imaginary_node_t *
3558pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
3559 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
3560
3561 pm_imaginary_node_t *node = PM_ALLOC_NODE(parser, pm_imaginary_node_t);
3562 *node = (pm_imaginary_node_t) {
3563 {
3564 .type = PM_IMAGINARY_NODE,
3565 .flags = PM_NODE_FLAG_STATIC_LITERAL,
3566 .location = PM_LOCATION_TOKEN_VALUE(token)
3567 },
3568 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
3569 .type = PM_TOKEN_INTEGER,
3570 .start = token->start,
3571 .end = token->end - 1
3572 }))
3573 };
3574
3575 return node;
3576}
3577
3582static pm_rational_node_t *
3583pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
3584 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
3585
3586 pm_rational_node_t *node = PM_ALLOC_NODE(parser, pm_rational_node_t);
3587 *node = (pm_rational_node_t) {
3588 {
3589 .type = PM_RATIONAL_NODE,
3590 .flags = PM_NODE_FLAG_STATIC_LITERAL,
3591 .location = PM_LOCATION_TOKEN_VALUE(token)
3592 },
3593 .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
3594 .type = PM_TOKEN_INTEGER,
3595 .start = token->start,
3596 .end = token->end - 1
3597 }))
3598 };
3599
3600 return node;
3601}
3602
3607static pm_imaginary_node_t *
3608pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
3609 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
3610
3611 pm_imaginary_node_t *node = PM_ALLOC_NODE(parser, pm_imaginary_node_t);
3612 *node = (pm_imaginary_node_t) {
3613 {
3614 .type = PM_IMAGINARY_NODE,
3615 .flags = PM_NODE_FLAG_STATIC_LITERAL,
3616 .location = PM_LOCATION_TOKEN_VALUE(token)
3617 },
3618 .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
3620 .start = token->start,
3621 .end = token->end - 1
3622 }))
3623 };
3624
3625 return node;
3626}
3627
3631static pm_in_node_t *
3632pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
3633 pm_in_node_t *node = PM_ALLOC_NODE(parser, pm_in_node_t);
3634
3635 const uint8_t *end;
3636 if (statements != NULL) {
3637 end = statements->base.location.end;
3638 } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
3639 end = then_keyword->end;
3640 } else {
3641 end = pattern->location.end;
3642 }
3643
3644 *node = (pm_in_node_t) {
3645 {
3646 .type = PM_IN_NODE,
3647 .location = {
3648 .start = in_keyword->start,
3649 .end = end
3650 },
3651 },
3652 .pattern = pattern,
3653 .statements = statements,
3654 .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
3655 .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
3656 };
3657
3658 return node;
3659}
3660
3665pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3666 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3668
3670 {
3672 .location = {
3673 .start = target->base.location.start,
3674 .end = value->location.end
3675 }
3676 },
3677 .name = target->name,
3678 .name_loc = target->base.location,
3679 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3680 .value = value
3681 };
3682
3683 return node;
3684}
3685
3690pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3692
3694 {
3696 .location = {
3697 .start = target->base.location.start,
3698 .end = value->location.end
3699 }
3700 },
3701 .name = target->name,
3702 .name_loc = target->base.location,
3703 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3704 .value = value,
3705 .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3706 };
3707
3708 return node;
3709}
3710
3715pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3716 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3718
3720 {
3722 .location = {
3723 .start = target->base.location.start,
3724 .end = value->location.end
3725 }
3726 },
3727 .name = target->name,
3728 .name_loc = target->base.location,
3729 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3730 .value = value
3731 };
3732
3733 return node;
3734}
3735
3740pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3741 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
3743
3745 {
3747 .location = PM_LOCATION_TOKEN_VALUE(token)
3748 },
3749 .name = pm_parser_constant_id_token(parser, token)
3750 };
3751
3752 return node;
3753}
3754
3760pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3763 {
3765 .location = {
3766 .start = read_node->base.location.start,
3767 .end = value->location.end
3768 }
3769 },
3770 .name = read_node->name,
3771 .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
3772 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3773 .value = value
3774 };
3775
3776 return node;
3777}
3778
3783pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
3785
3787 {
3789 .location = {
3790 .start = opening->start,
3791 .end = NULL,
3792 },
3793 },
3794 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3795 .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
3796 .parts = { 0 }
3797 };
3798
3799 return node;
3800}
3801
3802static inline void
3803pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
3804 if (node->base.location.start > part->location.start) {
3805 node->base.location.start = part->location.start;
3806 }
3807 if (node->base.location.end < part->location.end) {
3808 node->base.location.end = part->location.end;
3809 }
3810 pm_node_list_append(&node->parts, part);
3811}
3812
3813static inline void
3814pm_interpolated_regular_expression_node_closing_set(pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
3815 node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
3816 node->base.location.end = closing->end;
3817 pm_node_flag_set((pm_node_t *)node, pm_regular_expression_flags_create(closing));
3818}
3819
3824pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
3825 pm_interpolated_string_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_string_node_t);
3826
3828 {
3830 .location = {
3831 .start = opening->start,
3832 .end = closing->end,
3833 },
3834 },
3835 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
3836 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
3837 .parts = { 0 }
3838 };
3839
3840 if (parts != NULL) {
3841 node->parts = *parts;
3842 }
3843
3844 return node;
3845}
3846
3850static inline void
3851pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
3852 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
3853 node->base.location.start = part->location.start;
3854 }
3855
3856 pm_node_list_append(&node->parts, part);
3857 node->base.location.end = part->location.end;
3858}
3859
3863static void
3864pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
3865 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
3866 node->base.location.end = closing->end;
3867}
3868
3873pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
3874 pm_interpolated_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_interpolated_symbol_node_t);
3875
3877 {
3879 .location = {
3880 .start = opening->start,
3881 .end = closing->end,
3882 },
3883 },
3884 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
3885 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
3886 .parts = { 0 }
3887 };
3888
3889 if (parts != NULL) {
3890 node->parts = *parts;
3891 }
3892
3893 return node;
3894}
3895
3896static inline void
3897pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
3898 if (node->parts.size == 0 && node->opening_loc.start == NULL) {
3899 node->base.location.start = part->location.start;
3900 }
3901
3902 pm_node_list_append(&node->parts, part);
3903 node->base.location.end = part->location.end;
3904}
3905
3910pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
3912
3914 {
3916 .location = {
3917 .start = opening->start,
3918 .end = closing->end
3919 },
3920 },
3921 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
3922 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
3923 .parts = { 0 }
3924 };
3925
3926 return node;
3927}
3928
3929static inline void
3930pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
3931 pm_node_list_append(&node->parts, part);
3932 node->base.location.end = part->location.end;
3933}
3934
3935static inline void
3936pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
3937 node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
3938 node->base.location.end = closing->end;
3939}
3940
3945pm_keyword_hash_node_create(pm_parser_t *parser) {
3946 pm_keyword_hash_node_t *node = PM_ALLOC_NODE(parser, pm_keyword_hash_node_t);
3947
3948 *node = (pm_keyword_hash_node_t) {
3949 .base = {
3950 .type = PM_KEYWORD_HASH_NODE,
3951 .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
3953 },
3954 .elements = { 0 }
3955 };
3956
3957 return node;
3958}
3959
3963static void
3964pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
3965 // If the element being added is not an AssocNode or does not have a symbol key, then
3966 // we want to turn the STATIC_KEYS flag off.
3967 // TODO: Rename the flag to SYMBOL_KEYS instead.
3968 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
3969 pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
3970 }
3971
3972 pm_node_list_append(&hash->elements, element);
3973 if (hash->base.location.start == NULL) {
3974 hash->base.location.start = element->location.start;
3975 }
3976 hash->base.location.end = element->location.end;
3977}
3978
3983pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
3985
3987 {
3989 .location = {
3990 .start = name->start,
3991 .end = name->end
3992 },
3993 },
3994 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
3995 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
3996 };
3997
3998 return node;
3999}
4000
4005pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
4007
4009 {
4011 .location = {
4012 .start = name->start,
4013 .end = value->location.end
4014 },
4015 },
4016 .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
4017 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
4018 .value = value
4019 };
4020
4021 return node;
4022}
4023
4028pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
4030
4032 {
4034 .location = {
4035 .start = operator->start,
4036 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
4037 },
4038 },
4039 .name = pm_parser_optional_constant_id_token(parser, name),
4040 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
4041 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
4042 };
4043
4044 return node;
4045}
4046
4050static pm_lambda_node_t *
4051pm_lambda_node_create(
4052 pm_parser_t *parser,
4053 pm_constant_id_list_t *locals,
4054 uint32_t locals_body_index,
4055 const pm_token_t *operator,
4056 const pm_token_t *opening,
4057 const pm_token_t *closing,
4058 pm_node_t *parameters,
4059 pm_node_t *body
4060) {
4061 pm_lambda_node_t *node = PM_ALLOC_NODE(parser, pm_lambda_node_t);
4062
4063 *node = (pm_lambda_node_t) {
4064 {
4065 .type = PM_LAMBDA_NODE,
4066 .location = {
4067 .start = operator->start,
4068 .end = closing->end
4069 },
4070 },
4071 .locals = *locals,
4072 .locals_body_index = locals_body_index,
4073 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4074 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4075 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4076 .parameters = parameters,
4077 .body = body
4078 };
4079
4080 return node;
4081}
4082
4087pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
4089 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4091
4093 {
4095 .location = {
4096 .start = target->location.start,
4097 .end = value->location.end
4098 }
4099 },
4100 .name_loc = target->location,
4101 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4102 .value = value,
4103 .name = name,
4104 .depth = depth
4105 };
4106
4107 return node;
4108}
4109
4114pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
4116
4118 {
4120 .location = {
4121 .start = target->location.start,
4122 .end = value->location.end
4123 }
4124 },
4125 .name_loc = target->location,
4126 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4127 .value = value,
4128 .name = name,
4129 .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
4130 .depth = depth
4131 };
4132
4133 return node;
4134}
4135
4140pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
4142 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4144
4146 {
4148 .location = {
4149 .start = target->location.start,
4150 .end = value->location.end
4151 }
4152 },
4153 .name_loc = target->location,
4154 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4155 .value = value,
4156 .name = name,
4157 .depth = depth
4158 };
4159
4160 return node;
4161}
4162
4167pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4168 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
4169
4170 if (parser->current_param_name == name_id) {
4171 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_CIRCULAR);
4172 }
4173
4174 pm_local_variable_read_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_read_node_t);
4175
4177 {
4179 .location = PM_LOCATION_TOKEN_VALUE(name)
4180 },
4181 .name = name_id,
4182 .depth = depth
4183 };
4184
4185 return node;
4186}
4187
4192pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
4194
4196 {
4198 .location = {
4199 .start = name_loc->start,
4200 .end = value->location.end
4201 }
4202 },
4203 .name = name,
4204 .depth = depth,
4205 .value = value,
4206 .name_loc = *name_loc,
4207 .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
4208 };
4209
4210 return node;
4211}
4212
4217static inline bool
4218pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
4219 return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
4220}
4221
4226static inline void
4227pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
4228 if (pm_token_is_numbered_parameter(start, end)) {
4229 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
4230 }
4231}
4232
4238pm_local_variable_target_node_create_values(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
4240
4242 {
4244 .location = *location
4245 },
4246 .name = name,
4247 .depth = depth
4248 };
4249
4250 return node;
4251}
4252
4257pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
4258 pm_refute_numbered_parameter(parser, name->start, name->end);
4259
4260 return pm_local_variable_target_node_create_values(
4261 parser,
4262 &(pm_location_t) { .start = name->start, .end = name->end },
4263 pm_parser_constant_id_token(parser, name),
4264 0
4265 );
4266}
4267
4272pm_local_variable_target_node_create_depth(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
4273 pm_refute_numbered_parameter(parser, name->start, name->end);
4274
4275 return pm_local_variable_target_node_create_values(
4276 parser,
4277 &(pm_location_t) { .start = name->start, .end = name->end },
4278 pm_parser_constant_id_token(parser, name),
4279 depth
4280 );
4281}
4282
4287pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
4288 pm_assert_value_expression(parser, value);
4289
4290 pm_match_predicate_node_t *node = PM_ALLOC_NODE(parser, pm_match_predicate_node_t);
4291
4292 *node = (pm_match_predicate_node_t) {
4293 {
4295 .location = {
4296 .start = value->location.start,
4297 .end = pattern->location.end
4298 }
4299 },
4300 .value = value,
4301 .pattern = pattern,
4302 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
4303 };
4304
4305 return node;
4306}
4307
4312pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
4313 pm_assert_value_expression(parser, value);
4314
4315 pm_match_required_node_t *node = PM_ALLOC_NODE(parser, pm_match_required_node_t);
4316
4317 *node = (pm_match_required_node_t) {
4318 {
4319 .type = PM_MATCH_REQUIRED_NODE,
4320 .location = {
4321 .start = value->location.start,
4322 .end = pattern->location.end
4323 }
4324 },
4325 .value = value,
4326 .pattern = pattern,
4327 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
4328 };
4329
4330 return node;
4331}
4332
4336static pm_match_write_node_t *
4337pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
4338 pm_match_write_node_t *node = PM_ALLOC_NODE(parser, pm_match_write_node_t);
4339
4340 *node = (pm_match_write_node_t) {
4341 {
4342 .type = PM_MATCH_WRITE_NODE,
4343 .location = call->base.location
4344 },
4345 .call = call,
4346 .targets = { 0 }
4347 };
4348
4349 return node;
4350}
4351
4355static pm_module_node_t *
4356pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
4357 pm_module_node_t *node = PM_ALLOC_NODE(parser, pm_module_node_t);
4358
4359 *node = (pm_module_node_t) {
4360 {
4361 .type = PM_MODULE_NODE,
4362 .location = {
4363 .start = module_keyword->start,
4364 .end = end_keyword->end
4365 }
4366 },
4367 .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
4368 .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
4369 .constant_path = constant_path,
4370 .body = body,
4371 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
4372 .name = pm_parser_constant_id_token(parser, name)
4373 };
4374
4375 return node;
4376}
4377
4382pm_multi_target_node_create(pm_parser_t *parser) {
4383 pm_multi_target_node_t *node = PM_ALLOC_NODE(parser, pm_multi_target_node_t);
4384
4385 *node = (pm_multi_target_node_t) {
4386 {
4387 .type = PM_MULTI_TARGET_NODE,
4388 .location = { .start = NULL, .end = NULL }
4389 },
4390 .lefts = { 0 },
4391 .rest = NULL,
4392 .rights = { 0 },
4393 .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4394 .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4395 };
4396
4397 return node;
4398}
4399
4403static void
4404pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
4406 if (node->rest == NULL) {
4407 node->rest = target;
4408 } else {
4409 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
4410 pm_node_list_append(&node->rights, target);
4411 }
4412 } else if (node->rest == NULL) {
4413 pm_node_list_append(&node->lefts, target);
4414 } else {
4415 pm_node_list_append(&node->rights, target);
4416 }
4417
4418 if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
4419 node->base.location.start = target->location.start;
4420 }
4421
4422 if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
4423 node->base.location.end = target->location.end;
4424 }
4425}
4426
4430static void
4431pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
4432 node->base.location.start = lparen->start;
4433 node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
4434}
4435
4439static void
4440pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
4441 node->base.location.end = rparen->end;
4442 node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
4443}
4444
4448static pm_multi_write_node_t *
4449pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4450 pm_multi_write_node_t *node = PM_ALLOC_NODE(parser, pm_multi_write_node_t);
4451
4452 *node = (pm_multi_write_node_t) {
4453 {
4454 .type = PM_MULTI_WRITE_NODE,
4455 .location = {
4456 .start = target->base.location.start,
4457 .end = value->location.end
4458 }
4459 },
4460 .lefts = target->lefts,
4461 .rest = target->rest,
4462 .rights = target->rights,
4463 .lparen_loc = target->lparen_loc,
4464 .rparen_loc = target->rparen_loc,
4465 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4466 .value = value
4467 };
4468
4469 // Explicitly do not call pm_node_destroy here because we want to keep
4470 // around all of the information within the MultiWriteNode node.
4471 free(target);
4472
4473 return node;
4474}
4475
4479static pm_next_node_t *
4480pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
4481 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
4482 pm_next_node_t *node = PM_ALLOC_NODE(parser, pm_next_node_t);
4483
4484 *node = (pm_next_node_t) {
4485 {
4486 .type = PM_NEXT_NODE,
4487 .location = {
4488 .start = keyword->start,
4489 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
4490 }
4491 },
4492 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
4493 .arguments = arguments
4494 };
4495
4496 return node;
4497}
4498
4502static pm_nil_node_t *
4503pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
4504 assert(token->type == PM_TOKEN_KEYWORD_NIL);
4505 pm_nil_node_t *node = PM_ALLOC_NODE(parser, pm_nil_node_t);
4506
4507 *node = (pm_nil_node_t) {{
4508 .type = PM_NIL_NODE,
4509 .flags = PM_NODE_FLAG_STATIC_LITERAL,
4510 .location = PM_LOCATION_TOKEN_VALUE(token)
4511 }};
4512
4513 return node;
4514}
4515
4520pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
4521 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
4522 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
4524
4526 {
4528 .location = {
4529 .start = operator->start,
4530 .end = keyword->end
4531 }
4532 },
4533 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4534 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
4535 };
4536
4537 return node;
4538}
4539
4544pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
4545 pm_numbered_parameters_node_t *node = PM_ALLOC_NODE(parser, pm_numbered_parameters_node_t);
4546
4548 {
4550 .location = *location
4551 },
4552 .maximum = maximum
4553 };
4554
4555 return node;
4556}
4557
4562pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4563 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
4565
4567 {
4569 .location = PM_LOCATION_TOKEN_VALUE(name),
4570 },
4571 .number = parse_decimal_number(parser, name->start + 1, name->end)
4572 };
4573
4574 return node;
4575}
4576
4581pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
4582 pm_optional_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_optional_parameter_node_t);
4583
4585 {
4587 .location = {
4588 .start = name->start,
4589 .end = value->location.end
4590 }
4591 },
4592 .name = pm_parser_constant_id_token(parser, name),
4593 .name_loc = PM_LOCATION_TOKEN_VALUE(name),
4594 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4595 .value = value
4596 };
4597
4598 return node;
4599}
4600
4604static pm_or_node_t *
4605pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
4606 pm_assert_value_expression(parser, left);
4607
4608 pm_or_node_t *node = PM_ALLOC_NODE(parser, pm_or_node_t);
4609
4610 *node = (pm_or_node_t) {
4611 {
4612 .type = PM_OR_NODE,
4613 .location = {
4614 .start = left->location.start,
4615 .end = right->location.end
4616 }
4617 },
4618 .left = left,
4619 .right = right,
4620 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
4621 };
4622
4623 return node;
4624}
4625
4629static pm_parameters_node_t *
4630pm_parameters_node_create(pm_parser_t *parser) {
4631 pm_parameters_node_t *node = PM_ALLOC_NODE(parser, pm_parameters_node_t);
4632
4633 *node = (pm_parameters_node_t) {
4634 {
4635 .type = PM_PARAMETERS_NODE,
4636 .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
4637 },
4638 .rest = NULL,
4639 .keyword_rest = NULL,
4640 .block = NULL,
4641 .requireds = { 0 },
4642 .optionals = { 0 },
4643 .posts = { 0 },
4644 .keywords = { 0 }
4645 };
4646
4647 return node;
4648}
4649
4653static void
4654pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
4655 if (params->base.location.start == NULL) {
4656 params->base.location.start = param->location.start;
4657 } else {
4658 params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
4659 }
4660
4661 if (params->base.location.end == NULL) {
4662 params->base.location.end = param->location.end;
4663 } else {
4664 params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
4665 }
4666}
4667
4671static void
4672pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
4673 pm_parameters_node_location_set(params, param);
4674 pm_node_list_append(&params->requireds, param);
4675}
4676
4680static void
4681pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
4682 pm_parameters_node_location_set(params, (pm_node_t *) param);
4683 pm_node_list_append(&params->optionals, (pm_node_t *) param);
4684}
4685
4689static void
4690pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
4691 pm_parameters_node_location_set(params, param);
4692 pm_node_list_append(&params->posts, param);
4693}
4694
4698static void
4699pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
4700 pm_parameters_node_location_set(params, param);
4701 params->rest = param;
4702}
4703
4707static void
4708pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
4709 pm_parameters_node_location_set(params, param);
4710 pm_node_list_append(&params->keywords, param);
4711}
4712
4716static void
4717pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
4718 assert(params->keyword_rest == NULL);
4719 pm_parameters_node_location_set(params, param);
4720 params->keyword_rest = param;
4721}
4722
4726static void
4727pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
4728 assert(params->block == NULL);
4729 pm_parameters_node_location_set(params, (pm_node_t *) param);
4730 params->block = param;
4731}
4732
4736static pm_program_node_t *
4737pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
4738 pm_program_node_t *node = PM_ALLOC_NODE(parser, pm_program_node_t);
4739
4740 *node = (pm_program_node_t) {
4741 {
4742 .type = PM_PROGRAM_NODE,
4743 .location = {
4744 .start = statements == NULL ? parser->start : statements->base.location.start,
4745 .end = statements == NULL ? parser->end : statements->base.location.end
4746 }
4747 },
4748 .locals = *locals,
4749 .statements = statements
4750 };
4751
4752 return node;
4753}
4754
4758static pm_parentheses_node_t *
4759pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
4760 pm_parentheses_node_t *node = PM_ALLOC_NODE(parser, pm_parentheses_node_t);
4761
4762 *node = (pm_parentheses_node_t) {
4763 {
4764 .type = PM_PARENTHESES_NODE,
4765 .location = {
4766 .start = opening->start,
4767 .end = closing->end
4768 }
4769 },
4770 .body = body,
4771 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4772 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
4773 };
4774
4775 return node;
4776}
4777
4782pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
4783 pm_pinned_expression_node_t *node = PM_ALLOC_NODE(parser, pm_pinned_expression_node_t);
4784
4785 *node = (pm_pinned_expression_node_t) {
4786 {
4788 .location = {
4789 .start = operator->start,
4790 .end = rparen->end
4791 }
4792 },
4793 .expression = expression,
4794 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4795 .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
4796 .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
4797 };
4798
4799 return node;
4800}
4801
4806pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
4807 pm_pinned_variable_node_t *node = PM_ALLOC_NODE(parser, pm_pinned_variable_node_t);
4808
4809 *node = (pm_pinned_variable_node_t) {
4810 {
4812 .location = {
4813 .start = operator->start,
4814 .end = variable->location.end
4815 }
4816 },
4817 .variable = variable,
4818 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
4819 };
4820
4821 return node;
4822}
4823
4828pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
4829 pm_post_execution_node_t *node = PM_ALLOC_NODE(parser, pm_post_execution_node_t);
4830
4831 *node = (pm_post_execution_node_t) {
4832 {
4833 .type = PM_POST_EXECUTION_NODE,
4834 .location = {
4835 .start = keyword->start,
4836 .end = closing->end
4837 }
4838 },
4839 .statements = statements,
4840 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
4841 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4842 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
4843 };
4844
4845 return node;
4846}
4847
4852pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
4853 pm_pre_execution_node_t *node = PM_ALLOC_NODE(parser, pm_pre_execution_node_t);
4854
4855 *node = (pm_pre_execution_node_t) {
4856 {
4857 .type = PM_PRE_EXECUTION_NODE,
4858 .location = {
4859 .start = keyword->start,
4860 .end = closing->end
4861 }
4862 },
4863 .statements = statements,
4864 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
4865 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4866 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
4867 };
4868
4869 return node;
4870}
4871
4875static pm_range_node_t *
4876pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
4877 pm_assert_value_expression(parser, left);
4878 pm_assert_value_expression(parser, right);
4879
4880 pm_range_node_t *node = PM_ALLOC_NODE(parser, pm_range_node_t);
4881 pm_node_flags_t flags = 0;
4882
4883 // Indicate that this node an exclusive range if the operator is `...`.
4884 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
4886 }
4887
4888 // Indicate that this node is a static literal (i.e., can be compiled with
4889 // a putobject in CRuby) if the left and right are implicit nil, explicit
4890 // nil, or integers.
4891 if (
4892 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
4893 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
4894 ) {
4895 flags |= PM_NODE_FLAG_STATIC_LITERAL;
4896 }
4897
4898 *node = (pm_range_node_t) {
4899 {
4900 .type = PM_RANGE_NODE,
4901 .flags = flags,
4902 .location = {
4903 .start = (left == NULL ? operator->start : left->location.start),
4904 .end = (right == NULL ? operator->end : right->location.end)
4905 }
4906 },
4907 .left = left,
4908 .right = right,
4909 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
4910 };
4911
4912 return node;
4913}
4914
4918static pm_redo_node_t *
4919pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
4920 assert(token->type == PM_TOKEN_KEYWORD_REDO);
4921 pm_redo_node_t *node = PM_ALLOC_NODE(parser, pm_redo_node_t);
4922
4923 *node = (pm_redo_node_t) {{ .type = PM_REDO_NODE, .location = PM_LOCATION_TOKEN_VALUE(token) }};
4924 return node;
4925}
4926
4932pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
4933 pm_regular_expression_node_t *node = PM_ALLOC_NODE(parser, pm_regular_expression_node_t);
4934
4936 {
4938 .flags = pm_regular_expression_flags_create(closing) | PM_NODE_FLAG_STATIC_LITERAL,
4939 .location = {
4940 .start = MIN(opening->start, closing->start),
4941 .end = MAX(opening->end, closing->end)
4942 }
4943 },
4944 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4945 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
4946 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4947 .unescaped = *unescaped
4948 };
4949
4950 return node;
4951}
4952
4956static inline pm_regular_expression_node_t *
4957pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
4958 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
4959}
4960
4965pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4966 pm_required_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_required_parameter_node_t);
4967
4969 {
4971 .location = PM_LOCATION_TOKEN_VALUE(token)
4972 },
4973 .name = pm_parser_constant_id_token(parser, token)
4974 };
4975
4976 return node;
4977}
4978
4983pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
4984 pm_rescue_modifier_node_t *node = PM_ALLOC_NODE(parser, pm_rescue_modifier_node_t);
4985
4986 *node = (pm_rescue_modifier_node_t) {
4987 {
4989 .location = {
4990 .start = expression->location.start,
4991 .end = rescue_expression->location.end
4992 }
4993 },
4994 .expression = expression,
4995 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
4996 .rescue_expression = rescue_expression
4997 };
4998
4999 return node;
5000}
5001
5005static pm_rescue_node_t *
5006pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
5007 pm_rescue_node_t *node = PM_ALLOC_NODE(parser, pm_rescue_node_t);
5008
5009 *node = (pm_rescue_node_t) {
5010 {
5011 .type = PM_RESCUE_NODE,
5012 .location = PM_LOCATION_TOKEN_VALUE(keyword)
5013 },
5014 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5015 .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5016 .reference = NULL,
5017 .statements = NULL,
5018 .consequent = NULL,
5019 .exceptions = { 0 }
5020 };
5021
5022 return node;
5023}
5024
5025static inline void
5026pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
5027 node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
5028}
5029
5033static void
5034pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
5035 node->reference = reference;
5036 node->base.location.end = reference->location.end;
5037}
5038
5042static void
5043pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
5044 node->statements = statements;
5045 if (pm_statements_node_body_length(statements) > 0) {
5046 node->base.location.end = statements->base.location.end;
5047 }
5048}
5049
5053static void
5054pm_rescue_node_consequent_set(pm_rescue_node_t *node, pm_rescue_node_t *consequent) {
5055 node->consequent = consequent;
5056 node->base.location.end = consequent->base.location.end;
5057}
5058
5062static void
5063pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
5064 pm_node_list_append(&node->exceptions, exception);
5065 node->base.location.end = exception->location.end;
5066}
5067
5072pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5073 pm_rest_parameter_node_t *node = PM_ALLOC_NODE(parser, pm_rest_parameter_node_t);
5074
5075 *node = (pm_rest_parameter_node_t) {
5076 {
5077 .type = PM_REST_PARAMETER_NODE,
5078 .location = {
5079 .start = operator->start,
5080 .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5081 }
5082 },
5083 .name = pm_parser_optional_constant_id_token(parser, name),
5084 .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5085 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5086 };
5087
5088 return node;
5089}
5090
5094static pm_retry_node_t *
5095pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
5096 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
5097 pm_retry_node_t *node = PM_ALLOC_NODE(parser, pm_retry_node_t);
5098
5099 *node = (pm_retry_node_t) {{ .type = PM_RETRY_NODE, .location = PM_LOCATION_TOKEN_VALUE(token) }};
5100 return node;
5101}
5102
5106static pm_return_node_t *
5107pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5108 pm_return_node_t *node = PM_ALLOC_NODE(parser, pm_return_node_t);
5109
5110 *node = (pm_return_node_t) {
5111 {
5112 .type = PM_RETURN_NODE,
5113 .location = {
5114 .start = keyword->start,
5115 .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
5116 }
5117 },
5118 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5119 .arguments = arguments
5120 };
5121
5122 return node;
5123}
5124
5128static pm_self_node_t *
5129pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
5130 assert(token->type == PM_TOKEN_KEYWORD_SELF);
5131 pm_self_node_t *node = PM_ALLOC_NODE(parser, pm_self_node_t);
5132
5133 *node = (pm_self_node_t) {{
5134 .type = PM_SELF_NODE,
5135 .location = PM_LOCATION_TOKEN_VALUE(token)
5136 }};
5137
5138 return node;
5139}
5140
5145pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
5146 pm_singleton_class_node_t *node = PM_ALLOC_NODE(parser, pm_singleton_class_node_t);
5147
5148 *node = (pm_singleton_class_node_t) {
5149 {
5151 .location = {
5152 .start = class_keyword->start,
5153 .end = end_keyword->end
5154 }
5155 },
5156 .locals = *locals,
5157 .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
5158 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5159 .expression = expression,
5160 .body = body,
5161 .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
5162 };
5163
5164 return node;
5165}
5166
5171pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
5172 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
5173 pm_source_encoding_node_t *node = PM_ALLOC_NODE(parser, pm_source_encoding_node_t);
5174
5175 *node = (pm_source_encoding_node_t) {{
5177 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5178 .location = PM_LOCATION_TOKEN_VALUE(token)
5179 }};
5180
5181 return node;
5182}
5183
5188pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
5189 pm_source_file_node_t *node = PM_ALLOC_NODE(parser, pm_source_file_node_t);
5190 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
5191
5192 *node = (pm_source_file_node_t) {
5193 {
5194 .type = PM_SOURCE_FILE_NODE,
5195 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5196 .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
5197 },
5198 .filepath = parser->filepath_string,
5199 };
5200
5201 return node;
5202}
5203
5207static pm_source_line_node_t *
5208pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
5209 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
5210 pm_source_line_node_t *node = PM_ALLOC_NODE(parser, pm_source_line_node_t);
5211
5212 *node = (pm_source_line_node_t) {{
5213 .type = PM_SOURCE_LINE_NODE,
5214 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5215 .location = PM_LOCATION_TOKEN_VALUE(token)
5216 }};
5217
5218 return node;
5219}
5220
5224static pm_splat_node_t *
5225pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
5226 pm_splat_node_t *node = PM_ALLOC_NODE(parser, pm_splat_node_t);
5227
5228 *node = (pm_splat_node_t) {
5229 {
5230 .type = PM_SPLAT_NODE,
5231 .location = {
5232 .start = operator->start,
5233 .end = (expression == NULL ? operator->end : expression->location.end)
5234 }
5235 },
5236 .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5237 .expression = expression
5238 };
5239
5240 return node;
5241}
5242
5246static pm_statements_node_t *
5247pm_statements_node_create(pm_parser_t *parser) {
5248 pm_statements_node_t *node = PM_ALLOC_NODE(parser, pm_statements_node_t);
5249
5250 *node = (pm_statements_node_t) {
5251 {
5252 .type = PM_STATEMENTS_NODE,
5253 .location = PM_LOCATION_NULL_VALUE(parser)
5254 },
5255 .body = { 0 }
5256 };
5257
5258 return node;
5259}
5260
5264static size_t
5265pm_statements_node_body_length(pm_statements_node_t *node) {
5266 return node && node->body.size;
5267}
5268
5272static void
5273pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
5274 node->base.location = (pm_location_t) { .start = start, .end = end };
5275}
5276
5280static void
5281pm_statements_node_body_append(pm_statements_node_t *node, pm_node_t *statement) {
5282 if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
5283 node->base.location.start = statement->location.start;
5284 }
5285 if (statement->location.end > node->base.location.end) {
5286 node->base.location.end = statement->location.end;
5287 }
5288
5289 pm_node_list_append(&node->body, statement);
5290
5291 // Every statement gets marked as a place where a newline can occur.
5292 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
5293}
5294
5298static inline pm_string_node_t *
5299pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
5300 pm_string_node_t *node = PM_ALLOC_NODE(parser, pm_string_node_t);
5301 pm_node_flags_t flags = 0;
5302
5303 if (parser->frozen_string_literal) {
5304 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
5305 }
5306
5307 *node = (pm_string_node_t) {
5308 {
5309 .type = PM_STRING_NODE,
5310 .flags = flags,
5311 .location = {
5312 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
5313 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
5314 }
5315 },
5316 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5317 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
5318 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5319 .unescaped = *string
5320 };
5321
5322 return node;
5323}
5324
5328static pm_string_node_t *
5329pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5330 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5331}
5332
5337static pm_string_node_t *
5338pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5339 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
5341 return node;
5342}
5343
5347static pm_super_node_t *
5348pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
5349 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
5350 pm_super_node_t *node = PM_ALLOC_NODE(parser, pm_super_node_t);
5351
5352 const uint8_t *end = pm_arguments_end(arguments);
5353 if (end == NULL) {
5354 assert(false && "unreachable");
5355 }
5356
5357 *node = (pm_super_node_t) {
5358 {
5359 .type = PM_SUPER_NODE,
5360 .location = {
5361 .start = keyword->start,
5362 .end = end,
5363 }
5364 },
5365 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5366 .lparen_loc = arguments->opening_loc,
5367 .arguments = arguments->arguments,
5368 .rparen_loc = arguments->closing_loc,
5369 .block = arguments->block
5370 };
5371
5372 return node;
5373}
5374
5379static pm_symbol_node_t *
5380pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
5381 pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
5382
5383 *node = (pm_symbol_node_t) {
5384 {
5385 .type = PM_SYMBOL_NODE,
5386 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5387 .location = {
5388 .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
5389 .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
5390 }
5391 },
5392 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5393 .value_loc = PM_LOCATION_TOKEN_VALUE(value),
5394 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5395 .unescaped = *unescaped
5396 };
5397
5398 return node;
5399}
5400
5404static inline pm_symbol_node_t *
5405pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5406 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY);
5407}
5408
5412static pm_symbol_node_t *
5413pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
5414 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
5416 return node;
5417}
5418
5422static pm_symbol_node_t *
5423pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
5424 pm_symbol_node_t *node;
5425
5426 switch (token->type) {
5427 case PM_TOKEN_LABEL: {
5428 pm_token_t opening = not_provided(parser);
5429 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
5430
5431 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
5432 node = pm_symbol_node_create(parser, &opening, &label, &closing);
5433
5434 assert((label.end - label.start) >= 0);
5435 pm_string_shared_init(&node->unescaped, label.start, label.end);
5436 break;
5437 }
5438 case PM_TOKEN_MISSING: {
5439 pm_token_t opening = not_provided(parser);
5440 pm_token_t closing = not_provided(parser);
5441
5442 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
5443 node = pm_symbol_node_create(parser, &opening, &label, &closing);
5444 break;
5445 }
5446 default:
5447 assert(false && "unreachable");
5448 node = NULL;
5449 break;
5450 }
5451
5452 return node;
5453}
5454
5458static bool
5459pm_symbol_node_label_p(pm_node_t *node) {
5460 const uint8_t *end = NULL;
5461
5462 switch (PM_NODE_TYPE(node)) {
5463 case PM_SYMBOL_NODE:
5464 end = ((pm_symbol_node_t *) node)->closing_loc.end;
5465 break;
5467 end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
5468 break;
5469 default:
5470 return false;
5471 }
5472
5473 return (end != NULL) && (end[-1] == ':');
5474}
5475
5479static pm_symbol_node_t *
5480pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
5481 pm_symbol_node_t *new_node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
5482
5483 *new_node = (pm_symbol_node_t) {
5484 {
5485 .type = PM_SYMBOL_NODE,
5486 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5487 .location = {
5488 .start = opening->start,
5489 .end = closing->end
5490 }
5491 },
5492 .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5493 .value_loc = node->content_loc,
5494 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5495 .unescaped = node->unescaped
5496 };
5497
5498 // We are explicitly _not_ using pm_node_destroy here because we don't want
5499 // to trash the unescaped string. We could instead copy the string if we
5500 // know that it is owned, but we're taking the fast path for now.
5501 free(node);
5502
5503 return new_node;
5504}
5505
5509static pm_string_node_t *
5510pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
5511 pm_string_node_t *new_node = PM_ALLOC_NODE(parser, pm_string_node_t);
5512 pm_node_flags_t flags = 0;
5513
5514 if (parser->frozen_string_literal) {
5515 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
5516 }
5517
5518 *new_node = (pm_string_node_t) {
5519 {
5520 .type = PM_STRING_NODE,
5521 .flags = flags,
5522 .location = node->base.location
5523 },
5524 .opening_loc = node->opening_loc,
5525 .content_loc = node->value_loc,
5526 .closing_loc = node->closing_loc,
5527 .unescaped = node->unescaped
5528 };
5529
5530 // We are explicitly _not_ using pm_node_destroy here because we don't want
5531 // to trash the unescaped string. We could instead copy the string if we
5532 // know that it is owned, but we're taking the fast path for now.
5533 free(node);
5534
5535 return new_node;
5536}
5537
5541static pm_true_node_t *
5542pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
5543 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
5544 pm_true_node_t *node = PM_ALLOC_NODE(parser, pm_true_node_t);
5545
5546 *node = (pm_true_node_t) {{
5547 .type = PM_TRUE_NODE,
5548 .flags = PM_NODE_FLAG_STATIC_LITERAL,
5549 .location = PM_LOCATION_TOKEN_VALUE(token)
5550 }};
5551
5552 return node;
5553}
5554
5558static pm_undef_node_t *
5559pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
5560 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
5561 pm_undef_node_t *node = PM_ALLOC_NODE(parser, pm_undef_node_t);
5562
5563 *node = (pm_undef_node_t) {
5564 {
5565 .type = PM_UNDEF_NODE,
5566 .location = PM_LOCATION_TOKEN_VALUE(token),
5567 },
5568 .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
5569 .names = { 0 }
5570 };
5571
5572 return node;
5573}
5574
5578static void
5579pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
5580 node->base.location.end = name->location.end;
5581 pm_node_list_append(&node->names, name);
5582}
5583
5587static pm_unless_node_t *
5588pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
5589 pm_conditional_predicate(predicate);
5590 pm_unless_node_t *node = PM_ALLOC_NODE(parser, pm_unless_node_t);
5591
5592 const uint8_t *end;
5593 if (statements != NULL) {
5594 end = statements->base.location.end;
5595 } else {
5596 end = predicate->location.end;
5597 }
5598
5599 *node = (pm_unless_node_t) {
5600 {
5601 .type = PM_UNLESS_NODE,
5602 .flags = PM_NODE_FLAG_NEWLINE,
5603 .location = {
5604 .start = keyword->start,
5605 .end = end
5606 },
5607 },
5608 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5609 .predicate = predicate,
5610 .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
5611 .statements = statements,
5612 .consequent = NULL,
5613 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5614 };
5615
5616 return node;
5617}
5618
5622static pm_unless_node_t *
5623pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
5624 pm_conditional_predicate(predicate);
5625 pm_unless_node_t *node = PM_ALLOC_NODE(parser, pm_unless_node_t);
5626
5627 pm_statements_node_t *statements = pm_statements_node_create(parser);
5628 pm_statements_node_body_append(statements, statement);
5629
5630 *node = (pm_unless_node_t) {
5631 {
5632 .type = PM_UNLESS_NODE,
5633 .flags = PM_NODE_FLAG_NEWLINE,
5634 .location = {
5635 .start = statement->location.start,
5636 .end = predicate->location.end
5637 },
5638 },
5639 .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
5640 .predicate = predicate,
5641 .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5642 .statements = statements,
5643 .consequent = NULL,
5644 .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5645 };
5646
5647 return node;
5648}
5649
5650static inline void
5651pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
5652 node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
5653 node->base.location.end = end_keyword->end;
5654}
5655
5659static pm_until_node_t *
5660pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
5661 pm_until_node_t *node = PM_ALLOC_NODE(parser, pm_until_node_t);
5662
5663 *node = (pm_until_node_t) {
5664 {
5665 .type = PM_UNTIL_NODE,
5666 .flags = flags,
5667 .location = {
5668 .start = keyword->start,
5669 .end = closing->end,
5670 },
5671 },
5672 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5673 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5674 .predicate = predicate,
5675 .statements = statements
5676 };
5677
5678 return node;
5679}
5680
5684static pm_until_node_t *
5685pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
5686 pm_until_node_t *node = PM_ALLOC_NODE(parser, pm_until_node_t);
5687
5688 *node = (pm_until_node_t) {
5689 {
5690 .type = PM_UNTIL_NODE,
5691 .flags = flags,
5692 .location = {
5693 .start = statements->base.location.start,
5694 .end = predicate->location.end,
5695 },
5696 },
5697 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5698 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5699 .predicate = predicate,
5700 .statements = statements
5701 };
5702
5703 return node;
5704}
5705
5709static pm_when_node_t *
5710pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
5711 pm_when_node_t *node = PM_ALLOC_NODE(parser, pm_when_node_t);
5712
5713 *node = (pm_when_node_t) {
5714 {
5715 .type = PM_WHEN_NODE,
5716 .location = {
5717 .start = keyword->start,
5718 .end = NULL
5719 }
5720 },
5721 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5722 .statements = NULL,
5723 .conditions = { 0 }
5724 };
5725
5726 return node;
5727}
5728
5732static void
5733pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
5734 node->base.location.end = condition->location.end;
5735 pm_node_list_append(&node->conditions, condition);
5736}
5737
5741static void
5742pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
5743 if (statements->base.location.end > node->base.location.end) {
5744 node->base.location.end = statements->base.location.end;
5745 }
5746
5747 node->statements = statements;
5748}
5749
5753static pm_while_node_t *
5754pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
5755 pm_while_node_t *node = PM_ALLOC_NODE(parser, pm_while_node_t);
5756
5757 *node = (pm_while_node_t) {
5758 {
5759 .type = PM_WHILE_NODE,
5760 .flags = flags,
5761 .location = {
5762 .start = keyword->start,
5763 .end = closing->end
5764 },
5765 },
5766 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5767 .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5768 .predicate = predicate,
5769 .statements = statements
5770 };
5771
5772 return node;
5773}
5774
5778static pm_while_node_t *
5779pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
5780 pm_while_node_t *node = PM_ALLOC_NODE(parser, pm_while_node_t);
5781
5782 *node = (pm_while_node_t) {
5783 {
5784 .type = PM_WHILE_NODE,
5785 .flags = flags,
5786 .location = {
5787 .start = statements->base.location.start,
5788 .end = predicate->location.end
5789 },
5790 },
5791 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5792 .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5793 .predicate = predicate,
5794 .statements = statements
5795 };
5796
5797 return node;
5798}
5799
5804static pm_x_string_node_t *
5805pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
5806 pm_x_string_node_t *node = PM_ALLOC_NODE(parser, pm_x_string_node_t);
5807
5808 *node = (pm_x_string_node_t) {
5809 {
5810 .type = PM_X_STRING_NODE,
5811 .flags = PM_STRING_FLAGS_FROZEN,
5812 .location = {
5813 .start = opening->start,
5814 .end = closing->end
5815 },
5816 },
5817 .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5818 .content_loc = PM_LOCATION_TOKEN_VALUE(content),
5819 .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5820 .unescaped = *unescaped
5821 };
5822
5823 return node;
5824}
5825
5829static inline pm_x_string_node_t *
5830pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5831 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5832}
5833
5837static pm_yield_node_t *
5838pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
5839 pm_yield_node_t *node = PM_ALLOC_NODE(parser, pm_yield_node_t);
5840
5841 const uint8_t *end;
5842 if (rparen_loc->start != NULL) {
5843 end = rparen_loc->end;
5844 } else if (arguments != NULL) {
5845 end = arguments->base.location.end;
5846 } else if (lparen_loc->start != NULL) {
5847 end = lparen_loc->end;
5848 } else {
5849 end = keyword->end;
5850 }
5851
5852 *node = (pm_yield_node_t) {
5853 {
5854 .type = PM_YIELD_NODE,
5855 .location = {
5856 .start = keyword->start,
5857 .end = end
5858 },
5859 },
5860 .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
5861 .lparen_loc = *lparen_loc,
5862 .arguments = arguments,
5863 .rparen_loc = *rparen_loc
5864 };
5865
5866 return node;
5867}
5868
5869#undef PM_ALLOC_NODE
5870
5871/******************************************************************************/
5872/* Scope-related functions */
5873/******************************************************************************/
5874
5878static bool
5879pm_parser_scope_push(pm_parser_t *parser, bool closed) {
5880 pm_scope_t *scope = (pm_scope_t *) malloc(sizeof(pm_scope_t));
5881 if (scope == NULL) return false;
5882
5883 *scope = (pm_scope_t) {
5884 .previous = parser->current_scope,
5885 .closed = closed,
5886 .explicit_params = false,
5887 .numbered_parameters = 0,
5888 };
5889
5890 pm_constant_id_list_init(&scope->locals);
5891 parser->current_scope = scope;
5892
5893 return true;
5894}
5895
5900static int
5901pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
5902 pm_scope_t *scope = parser->current_scope;
5903 int depth = 0;
5904
5905 while (scope != NULL) {
5906 if (pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
5907 if (scope->closed) break;
5908
5909 scope = scope->previous;
5910 depth++;
5911 }
5912
5913 return -1;
5914}
5915
5921static inline int
5922pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
5923 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
5924}
5925
5929static inline void
5930pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
5931 if (!pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
5932 pm_constant_id_list_append(&parser->current_scope->locals, constant_id);
5933 }
5934}
5935
5939static inline void
5940pm_parser_numbered_parameters_set(pm_parser_t *parser, uint8_t numbered_parameters) {
5941 parser->current_scope->numbered_parameters = numbered_parameters;
5942}
5943
5947static pm_constant_id_t
5948pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5949 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
5950 if (constant_id != 0) pm_parser_local_add(parser, constant_id);
5951 return constant_id;
5952}
5953
5957static inline void
5958pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token) {
5959 pm_parser_local_add_location(parser, token->start, token->end);
5960}
5961
5965static pm_constant_id_t
5966pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t length) {
5967 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
5968 if (constant_id != 0) pm_parser_local_add(parser, constant_id);
5969 return constant_id;
5970}
5971
5976static void
5977pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
5978 // We want to check whether the parameter name is a numbered parameter or
5979 // not.
5980 pm_refute_numbered_parameter(parser, name->start, name->end);
5981
5982 // We want to ignore any parameter name that starts with an underscore.
5983 if ((name->start < name->end) && (*name->start == '_')) return;
5984
5985 // Otherwise we'll fetch the constant id for the parameter name and check
5986 // whether it's already in the current scope.
5987 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
5988
5989 if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
5990 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
5991 }
5992}
5993
5999static void
6000pm_parser_scope_pop(pm_parser_t *parser) {
6001 pm_scope_t *scope = parser->current_scope;
6002 parser->current_scope = scope->previous;
6003 free(scope);
6004}
6005
6006/******************************************************************************/
6007/* Basic character checks */
6008/******************************************************************************/
6009
6016static inline size_t
6017char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
6018 if (parser->encoding_changed) {
6019 size_t width;
6020 if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
6021 return width;
6022 } else if (*b == '_') {
6023 return 1;
6024 } else if (*b >= 0x80) {
6025 return parser->encoding->char_width(b, parser->end - b);
6026 } else {
6027 return 0;
6028 }
6029 } else if (*b < 0x80) {
6030 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
6031 } else {
6032 return (size_t) (pm_encoding_utf_8_alpha_char(b, parser->end - b) || 1u);
6033 }
6034}
6035
6040static inline size_t
6041char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
6042 if (*b < 0x80) {
6043 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
6044 } else {
6045 return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
6046 }
6047}
6048
6054static inline size_t
6055char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
6056 if (parser->encoding_changed) {
6057 size_t width;
6058 if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
6059 return width;
6060 } else if (*b == '_') {
6061 return 1;
6062 } else if (*b >= 0x80) {
6063 return parser->encoding->char_width(b, parser->end - b);
6064 } else {
6065 return 0;
6066 }
6067 }
6068 return char_is_identifier_utf8(b, parser->end);
6069}
6070
6071// Here we're defining a perfect hash for the characters that are allowed in
6072// global names. This is used to quickly check the next character after a $ to
6073// see if it's a valid character for a global name.
6074#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
6075#define PUNCT(idx) ( \
6076 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
6077 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
6078 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
6079 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
6080 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
6081 BIT('0', idx))
6082
6083const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
6084
6085#undef BIT
6086#undef PUNCT
6087
6088static inline bool
6089char_is_global_name_punctuation(const uint8_t b) {
6090 const unsigned int i = (const unsigned int) b;
6091 if (i <= 0x20 || 0x7e < i) return false;
6092
6093 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
6094}
6095
6096static inline bool
6097token_is_setter_name(pm_token_t *token) {
6098 return (
6099 (token->type == PM_TOKEN_IDENTIFIER) &&
6100 (token->end - token->start >= 2) &&
6101 (token->end[-1] == '=')
6102 );
6103}
6104
6105/******************************************************************************/
6106/* Stack helpers */
6107/******************************************************************************/
6108
6109static inline void
6110pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
6111 // Use the negation of the value to prevent stack overflow.
6112 pm_state_stack_push(&parser->accepts_block_stack, !value);
6113}
6114
6115static inline void
6116pm_accepts_block_stack_pop(pm_parser_t *parser) {
6117 pm_state_stack_pop(&parser->accepts_block_stack);
6118}
6119
6120static inline bool
6121pm_accepts_block_stack_p(pm_parser_t *parser) {
6122 return !pm_state_stack_p(&parser->accepts_block_stack);
6123}
6124
6125static inline void
6126pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
6127 pm_state_stack_push(&parser->do_loop_stack, value);
6128}
6129
6130static inline void
6131pm_do_loop_stack_pop(pm_parser_t *parser) {
6132 pm_state_stack_pop(&parser->do_loop_stack);
6133}
6134
6135static inline bool
6136pm_do_loop_stack_p(pm_parser_t *parser) {
6137 return pm_state_stack_p(&parser->do_loop_stack);
6138}
6139
6140/******************************************************************************/
6141/* Lexer check helpers */
6142/******************************************************************************/
6143
6148static inline uint8_t
6149peek_at(pm_parser_t *parser, const uint8_t *cursor) {
6150 if (cursor < parser->end) {
6151 return *cursor;
6152 } else {
6153 return '\0';
6154 }
6155}
6156
6162static inline uint8_t
6163peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
6164 return peek_at(parser, parser->current.end + offset);
6165}
6166
6171static inline uint8_t
6172peek(pm_parser_t *parser) {
6173 return peek_at(parser, parser->current.end);
6174}
6175
6180static inline bool
6181match(pm_parser_t *parser, uint8_t value) {
6182 if (peek(parser) == value) {
6183 parser->current.end++;
6184 return true;
6185 }
6186 return false;
6187}
6188
6193static inline size_t
6194match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
6195 if (peek_at(parser, cursor) == '\n') {
6196 return 1;
6197 }
6198 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
6199 return 2;
6200 }
6201 return 0;
6202}
6203
6209static inline size_t
6210match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
6211 return match_eol_at(parser, parser->current.end + offset);
6212}
6213
6219static inline size_t
6220match_eol(pm_parser_t *parser) {
6221 return match_eol_at(parser, parser->current.end);
6222}
6223
6227static inline const uint8_t *
6228next_newline(const uint8_t *cursor, ptrdiff_t length) {
6229 assert(length >= 0);
6230
6231 // Note that it's okay for us to use memchr here to look for \n because none
6232 // of the encodings that we support have \n as a component of a multi-byte
6233 // character.
6234 return memchr(cursor, '\n', (size_t) length);
6235}
6236
6241static bool
6242parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
6243 const pm_encoding_t *encoding = pm_encoding_find(start, end);
6244
6245 if (encoding != NULL) {
6246 if (encoding != PM_ENCODING_UTF_8_ENTRY) {
6247 parser->encoding = encoding;
6248 parser->encoding_changed = true;
6249 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
6250 }
6251
6252 return true;
6253 }
6254
6255 return false;
6256}
6257
6262static void
6263parser_lex_magic_comment_encoding(pm_parser_t *parser) {
6264 const uint8_t *cursor = parser->current.start + 1;
6265 const uint8_t *end = parser->current.end;
6266
6267 bool separator = false;
6268 while (true) {
6269 if (end - cursor <= 6) return;
6270 switch (cursor[6]) {
6271 case 'C': case 'c': cursor += 6; continue;
6272 case 'O': case 'o': cursor += 5; continue;
6273 case 'D': case 'd': cursor += 4; continue;
6274 case 'I': case 'i': cursor += 3; continue;
6275 case 'N': case 'n': cursor += 2; continue;
6276 case 'G': case 'g': cursor += 1; continue;
6277 case '=': case ':':
6278 separator = true;
6279 cursor += 6;
6280 break;
6281 default:
6282 cursor += 6;
6283 if (pm_char_is_whitespace(*cursor)) break;
6284 continue;
6285 }
6286 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
6287 separator = false;
6288 }
6289
6290 while (true) {
6291 do {
6292 if (++cursor >= end) return;
6293 } while (pm_char_is_whitespace(*cursor));
6294
6295 if (separator) break;
6296 if (*cursor != '=' && *cursor != ':') return;
6297
6298 separator = true;
6299 cursor++;
6300 }
6301
6302 const uint8_t *value_start = cursor;
6303 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
6304
6305 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
6306 // If we were unable to parse the encoding value, then we've got an
6307 // issue because we didn't understand the encoding that the user was
6308 // trying to use. In this case we'll keep using the default encoding but
6309 // add an error to the parser to indicate an unsuccessful parse.
6310 pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
6311 }
6312}
6313
6318static void
6319parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
6320 if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
6321 parser->frozen_string_literal = true;
6322 }
6323}
6324
6325static inline bool
6326pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
6327 return b == '\'' || b == '"' || b == ':' || b == ';';
6328}
6329
6335static inline const uint8_t *
6336parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
6337 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
6338 if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
6339 return cursor;
6340 }
6341 cursor++;
6342 }
6343 return NULL;
6344}
6345
6356static inline bool
6357parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
6358 bool result = true;
6359
6360 const uint8_t *start = parser->current.start + 1;
6361 const uint8_t *end = parser->current.end;
6362 if (end - start <= 7) return false;
6363
6364 const uint8_t *cursor;
6365 bool indicator = false;
6366
6367 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
6368 start = cursor + 3;
6369
6370 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
6371 end = cursor;
6372 indicator = true;
6373 } else {
6374 // If we have a start marker but not an end marker, then we cannot
6375 // have a magic comment.
6376 return false;
6377 }
6378 }
6379
6380 cursor = start;
6381 while (cursor < end) {
6382 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
6383
6384 const uint8_t *key_start = cursor;
6385 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
6386
6387 const uint8_t *key_end = cursor;
6388 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
6389 if (cursor == end) break;
6390
6391 if (*cursor == ':') {
6392 cursor++;
6393 } else {
6394 if (!indicator) return false;
6395 continue;
6396 }
6397
6398 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
6399 if (cursor == end) break;
6400
6401 const uint8_t *value_start;
6402 const uint8_t *value_end;
6403
6404 if (*cursor == '"') {
6405 value_start = ++cursor;
6406 for (; cursor < end && *cursor != '"'; cursor++) {
6407 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
6408 }
6409 value_end = cursor;
6410 } else {
6411 value_start = cursor;
6412 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
6413 value_end = cursor;
6414 }
6415
6416 if (indicator) {
6417 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
6418 } else {
6419 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
6420 if (cursor != end) return false;
6421 }
6422
6423 // Here, we need to do some processing on the key to swap out dashes for
6424 // underscores. We only need to do this if there _is_ a dash in the key.
6425 pm_string_t key;
6426 const size_t key_length = (size_t) (key_end - key_start);
6427 const uint8_t *dash = pm_memchr(key_start, '-', (size_t) key_length, parser->encoding_changed, parser->encoding);
6428
6429 if (dash == NULL) {
6430 pm_string_shared_init(&key, key_start, key_end);
6431 } else {
6432 size_t width = (size_t) (key_end - key_start);
6433 uint8_t *buffer = malloc(width);
6434 if (buffer == NULL) break;
6435
6436 memcpy(buffer, key_start, width);
6437 buffer[dash - key_start] = '_';
6438
6439 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
6440 buffer[dash - key_start] = '_';
6441 }
6442
6443 pm_string_owned_init(&key, buffer, width);
6444 }
6445
6446 // Finally, we can start checking the key against the list of known
6447 // magic comment keys, and potentially change state based on that.
6448 const uint8_t *key_source = pm_string_source(&key);
6449
6450 // We only want to attempt to compare against encoding comments if it's
6451 // the first line in the file (or the second in the case of a shebang).
6452 if (parser->current.start == parser->encoding_comment_start) {
6453 if (
6454 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
6455 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
6456 ) {
6457 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
6458 }
6459 }
6460
6461 // We only want to handle frozen string literal comments if it's before
6462 // any semantic tokens have been seen.
6463 if (!semantic_token_seen) {
6464 if (key_length == 21 && pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
6465 parser_lex_magic_comment_frozen_string_literal_value(parser, value_start, value_end);
6466 }
6467 }
6468
6469 // When we're done, we want to free the string in case we had to
6470 // allocate memory for it.
6471 pm_string_free(&key);
6472
6473 // Allocate a new magic comment node to append to the parser's list.
6475 if ((magic_comment = (pm_magic_comment_t *) calloc(sizeof(pm_magic_comment_t), 1)) != NULL) {
6476 magic_comment->key_start = key_start;
6477 magic_comment->value_start = value_start;
6478 magic_comment->key_length = (uint32_t) key_length;
6479 magic_comment->value_length = (uint32_t) (value_end - value_start);
6480 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
6481 }
6482 }
6483
6484 return result;
6485}
6486
6487/******************************************************************************/
6488/* Context manipulations */
6489/******************************************************************************/
6490
6491static bool
6492context_terminator(pm_context_t context, pm_token_t *token) {
6493 switch (context) {
6494 case PM_CONTEXT_MAIN:
6496 return token->type == PM_TOKEN_EOF;
6498 return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
6499 case PM_CONTEXT_PREEXE:
6500 case PM_CONTEXT_POSTEXE:
6501 return token->type == PM_TOKEN_BRACE_RIGHT;
6502 case PM_CONTEXT_MODULE:
6503 case PM_CONTEXT_CLASS:
6504 case PM_CONTEXT_SCLASS:
6506 case PM_CONTEXT_DEF:
6508 return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
6509 case PM_CONTEXT_WHILE:
6510 case PM_CONTEXT_UNTIL:
6511 case PM_CONTEXT_ELSE:
6512 case PM_CONTEXT_FOR:
6513 case PM_CONTEXT_ENSURE:
6515 return token->type == PM_TOKEN_KEYWORD_END;
6517 return token->type == PM_TOKEN_KEYWORD_IN;
6519 return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
6520 case PM_CONTEXT_CASE_IN:
6521 return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
6522 case PM_CONTEXT_IF:
6523 case PM_CONTEXT_ELSIF:
6524 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
6525 case PM_CONTEXT_UNLESS:
6526 return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
6527 case PM_CONTEXT_EMBEXPR:
6528 return token->type == PM_TOKEN_EMBEXPR_END;
6530 return token->type == PM_TOKEN_BRACE_RIGHT;
6531 case PM_CONTEXT_PARENS:
6532 return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
6533 case PM_CONTEXT_BEGIN:
6534 case PM_CONTEXT_RESCUE:
6536 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
6539 return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
6541 return token->type == PM_TOKEN_BRACE_RIGHT;
6543 return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
6544 }
6545
6546 return false;
6547}
6548
6549static bool
6550context_recoverable(pm_parser_t *parser, pm_token_t *token) {
6551 pm_context_node_t *context_node = parser->current_context;
6552
6553 while (context_node != NULL) {
6554 if (context_terminator(context_node->context, token)) return true;
6555 context_node = context_node->prev;
6556 }
6557
6558 return false;
6559}
6560
6561static bool
6562context_push(pm_parser_t *parser, pm_context_t context) {
6563 pm_context_node_t *context_node = (pm_context_node_t *) malloc(sizeof(pm_context_node_t));
6564 if (context_node == NULL) return false;
6565
6566 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
6567
6568 if (parser->current_context == NULL) {
6569 parser->current_context = context_node;
6570 } else {
6571 context_node->prev = parser->current_context;
6572 parser->current_context = context_node;
6573 }
6574
6575 return true;
6576}
6577
6578static void
6579context_pop(pm_parser_t *parser) {
6580 pm_context_node_t *prev = parser->current_context->prev;
6581 free(parser->current_context);
6582 parser->current_context = prev;
6583}
6584
6585static bool
6586context_p(pm_parser_t *parser, pm_context_t context) {
6587 pm_context_node_t *context_node = parser->current_context;
6588
6589 while (context_node != NULL) {
6590 if (context_node->context == context) return true;
6591 context_node = context_node->prev;
6592 }
6593
6594 return false;
6595}
6596
6597static bool
6598context_def_p(pm_parser_t *parser) {
6599 pm_context_node_t *context_node = parser->current_context;
6600
6601 while (context_node != NULL) {
6602 switch (context_node->context) {
6603 case PM_CONTEXT_DEF:
6608 return true;
6609 case PM_CONTEXT_CLASS:
6610 case PM_CONTEXT_MODULE:
6611 case PM_CONTEXT_SCLASS:
6612 return false;
6613 default:
6614 context_node = context_node->prev;
6615 }
6616 }
6617
6618 return false;
6619}
6620
6621/******************************************************************************/
6622/* Specific token lexers */
6623/******************************************************************************/
6624
6625static void
6626pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
6627 if (invalid != NULL) {
6628 pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
6629 }
6630}
6631
6632static size_t
6633pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
6634 const uint8_t *invalid = NULL;
6635 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
6636 pm_strspn_number_validate(parser, invalid);
6637 return length;
6638}
6639
6640static size_t
6641pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
6642 const uint8_t *invalid = NULL;
6643 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
6644 pm_strspn_number_validate(parser, invalid);
6645 return length;
6646}
6647
6648static size_t
6649pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
6650 const uint8_t *invalid = NULL;
6651 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
6652 pm_strspn_number_validate(parser, invalid);
6653 return length;
6654}
6655
6656static size_t
6657pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
6658 const uint8_t *invalid = NULL;
6659 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
6660 pm_strspn_number_validate(parser, invalid);
6661 return length;
6662}
6663
6664static pm_token_type_t
6665lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
6667
6668 // Here we're going to attempt to parse the optional decimal portion of a
6669 // float. If it's not there, then it's okay and we'll just continue on.
6670 if (peek(parser) == '.') {
6671 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
6672 parser->current.end += 2;
6673 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
6675 } else {
6676 // If we had a . and then something else, then it's not a float suffix on
6677 // a number it's a method call or something else.
6678 return type;
6679 }
6680 }
6681
6682 // Here we're going to attempt to parse the optional exponent portion of a
6683 // float. If it's not there, it's okay and we'll just continue on.
6684 if (match(parser, 'e') || match(parser, 'E')) {
6685 (void) (match(parser, '+') || match(parser, '-'));
6686 *seen_e = true;
6687
6688 if (pm_char_is_decimal_digit(peek(parser))) {
6689 parser->current.end++;
6690 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
6692 } else {
6693 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
6695 }
6696 }
6697
6698 return type;
6699}
6700
6701static pm_token_type_t
6702lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
6704 *seen_e = false;
6705
6706 if (peek_offset(parser, -1) == '0') {
6707 switch (*parser->current.end) {
6708 // 0d1111 is a decimal number
6709 case 'd':
6710 case 'D':
6711 parser->current.end++;
6712 if (pm_char_is_decimal_digit(peek(parser))) {
6713 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
6714 } else {
6715 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
6716 }
6717
6718 break;
6719
6720 // 0b1111 is a binary number
6721 case 'b':
6722 case 'B':
6723 parser->current.end++;
6724 if (pm_char_is_binary_digit(peek(parser))) {
6725 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
6726 } else {
6727 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
6728 }
6729
6731 break;
6732
6733 // 0o1111 is an octal number
6734 case 'o':
6735 case 'O':
6736 parser->current.end++;
6737 if (pm_char_is_octal_digit(peek(parser))) {
6738 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
6739 } else {
6740 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
6741 }
6742
6744 break;
6745
6746 // 01111 is an octal number
6747 case '_':
6748 case '0':
6749 case '1':
6750 case '2':
6751 case '3':
6752 case '4':
6753 case '5':
6754 case '6':
6755 case '7':
6756 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
6758 break;
6759
6760 // 0x1111 is a hexadecimal number
6761 case 'x':
6762 case 'X':
6763 parser->current.end++;
6764 if (pm_char_is_hexadecimal_digit(peek(parser))) {
6765 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
6766 } else {
6767 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
6768 }
6769
6771 break;
6772
6773 // 0.xxx is a float
6774 case '.': {
6775 type = lex_optional_float_suffix(parser, seen_e);
6776 break;
6777 }
6778
6779 // 0exxx is a float
6780 case 'e':
6781 case 'E': {
6782 type = lex_optional_float_suffix(parser, seen_e);
6783 break;
6784 }
6785 }
6786 } else {
6787 // If it didn't start with a 0, then we'll lex as far as we can into a
6788 // decimal number.
6789 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
6790
6791 // Afterward, we'll lex as far as we can into an optional float suffix.
6792 type = lex_optional_float_suffix(parser, seen_e);
6793 }
6794
6795 return type;
6796}
6797
6798static pm_token_type_t
6799lex_numeric(pm_parser_t *parser) {
6802
6803 if (parser->current.end < parser->end) {
6804 bool seen_e = false;
6805 type = lex_numeric_prefix(parser, &seen_e);
6806
6807 const uint8_t *end = parser->current.end;
6808 pm_token_type_t suffix_type = type;
6809
6810 if (type == PM_TOKEN_INTEGER) {
6811 if (match(parser, 'r')) {
6812 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
6813
6814 if (match(parser, 'i')) {
6816 }
6817 } else if (match(parser, 'i')) {
6818 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
6819 }
6820 } else {
6821 if (!seen_e && match(parser, 'r')) {
6822 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
6823
6824 if (match(parser, 'i')) {
6826 }
6827 } else if (match(parser, 'i')) {
6828 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
6829 }
6830 }
6831
6832 const uint8_t b = peek(parser);
6833 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
6834 parser->current.end = end;
6835 } else {
6836 type = suffix_type;
6837 }
6838 }
6839
6840 return type;
6841}
6842
6843static pm_token_type_t
6844lex_global_variable(pm_parser_t *parser) {
6845 if (parser->current.end >= parser->end) {
6846 pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
6848 }
6849
6850 switch (*parser->current.end) {
6851 case '~': // $~: match-data
6852 case '*': // $*: argv
6853 case '$': // $$: pid
6854 case '?': // $?: last status
6855 case '!': // $!: error string
6856 case '@': // $@: error position
6857 case '/': // $/: input record separator
6858 case '\\': // $\: output record separator
6859 case ';': // $;: field separator
6860 case ',': // $,: output field separator
6861 case '.': // $.: last read line number
6862 case '=': // $=: ignorecase
6863 case ':': // $:: load path
6864 case '<': // $<: reading filename
6865 case '>': // $>: default output handle
6866 case '\"': // $": already loaded files
6867 parser->current.end++;
6869
6870 case '&': // $&: last match
6871 case '`': // $`: string before last match
6872 case '\'': // $': string after last match
6873 case '+': // $+: string matches last paren.
6874 parser->current.end++;
6875 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
6876
6877 case '0': {
6878 parser->current.end++;
6879 size_t width;
6880
6881 if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
6882 do {
6883 parser->current.end += width;
6884 } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
6885
6886 // $0 isn't allowed to be followed by anything.
6887 pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
6888 }
6889
6891 }
6892
6893 case '1':
6894 case '2':
6895 case '3':
6896 case '4':
6897 case '5':
6898 case '6':
6899 case '7':
6900 case '8':
6901 case '9':
6902 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
6903 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
6904
6905 case '-':
6906 parser->current.end++;
6907 /* fallthrough */
6908 default: {
6909 size_t width;
6910
6911 if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
6912 do {
6913 parser->current.end += width;
6914 } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
6915 } else {
6916 // If we get here, then we have a $ followed by something that isn't
6917 // recognized as a global variable.
6918 pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
6919 }
6920
6922 }
6923 }
6924}
6925
6938static inline pm_token_type_t
6939lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
6940 if (memcmp(current_start, value, vlen) == 0) {
6941 pm_lex_state_t last_state = parser->lex_state;
6942
6943 if (parser->lex_state & PM_LEX_STATE_FNAME) {
6944 lex_state_set(parser, PM_LEX_STATE_ENDFN);
6945 } else {
6946 lex_state_set(parser, state);
6947 if (state == PM_LEX_STATE_BEG) {
6948 parser->command_start = true;
6949 }
6950
6951 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
6952 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
6953 return modifier_type;
6954 }
6955 }
6956
6957 return type;
6958 }
6959
6960 return PM_TOKEN_EOF;
6961}
6962
6963static pm_token_type_t
6964lex_identifier(pm_parser_t *parser, bool previous_command_start) {
6965 // Lex as far as we can into the current identifier.
6966 size_t width;
6967 const uint8_t *end = parser->end;
6968 const uint8_t *current_start = parser->current.start;
6969 const uint8_t *current_end = parser->current.end;
6970 bool encoding_changed = parser->encoding_changed;
6971
6972 if (encoding_changed) {
6973 while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
6974 current_end += width;
6975 }
6976 } else {
6977 while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
6978 current_end += width;
6979 }
6980 }
6981 parser->current.end = current_end;
6982
6983 // Now cache the length of the identifier so that we can quickly compare it
6984 // against known keywords.
6985 width = (size_t) (current_end - current_start);
6986
6987 if (current_end < end) {
6988 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
6989 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
6990 // check if we're returning the defined? keyword or just an identifier.
6991 width++;
6992
6993 if (
6994 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
6995 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
6996 ) {
6997 // If we're in a position where we can accept a : at the end of an
6998 // identifier, then we'll optionally accept it.
6999 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
7000 (void) match(parser, ':');
7001 return PM_TOKEN_LABEL;
7002 }
7003
7004 if (parser->lex_state != PM_LEX_STATE_DOT) {
7005 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
7007 }
7008 }
7009
7010 return PM_TOKEN_METHOD_NAME;
7011 }
7012
7013 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
7014 // If we're in a position where we can accept a = at the end of an
7015 // identifier, then we'll optionally accept it.
7016 return PM_TOKEN_IDENTIFIER;
7017 }
7018
7019 if (
7020 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
7021 peek(parser) == ':' && peek_offset(parser, 1) != ':'
7022 ) {
7023 // If we're in a position where we can accept a : at the end of an
7024 // identifier, then we'll optionally accept it.
7025 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
7026 (void) match(parser, ':');
7027 return PM_TOKEN_LABEL;
7028 }
7029 }
7030
7031 if (parser->lex_state != PM_LEX_STATE_DOT) {
7033 switch (width) {
7034 case 2:
7035 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
7036 if (pm_do_loop_stack_p(parser)) {
7038 }
7039 return PM_TOKEN_KEYWORD_DO;
7040 }
7041
7042 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
7043 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7044 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7045 break;
7046 case 3:
7047 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7048 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7049 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7050 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7051 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7052 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7053 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7054 break;
7055 case 4:
7056 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7057 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7058 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7059 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7060 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7061 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7062 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7063 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7064 break;
7065 case 5:
7066 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7067 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7068 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7069 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7070 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7071 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7072 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7073 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7074 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7075 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7076 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
7077 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
7078 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7079 break;
7080 case 6:
7081 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7082 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7083 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
7084 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7085 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
7086 break;
7087 case 8:
7088 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7089 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7090 break;
7091 case 12:
7092 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
7093 break;
7094 }
7095 }
7096
7097 if (encoding_changed) {
7098 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
7099 }
7100 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
7101}
7102
7107static bool
7108current_token_starts_line(pm_parser_t *parser) {
7109 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
7110}
7111
7126static pm_token_type_t
7127lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
7128 // If there is no content following this #, then we're at the end of
7129 // the string and we can safely return string content.
7130 if (pound + 1 >= parser->end) {
7131 parser->current.end = pound + 1;
7133 }
7134
7135 // Now we'll check against the character the follows the #. If it constitutes
7136 // valid interplation, we'll handle that, otherwise we'll return
7137 // PM_TOKEN_NOT_PROVIDED.
7138 switch (pound[1]) {
7139 case '@': {
7140 // In this case we may have hit an embedded instance or class variable.
7141 if (pound + 2 >= parser->end) {
7142 parser->current.end = pound + 1;
7144 }
7145
7146 // If we're looking at a @ and there's another @, then we'll skip past the
7147 // second @.
7148 const uint8_t *variable = pound + 2;
7149 if (*variable == '@' && pound + 3 < parser->end) variable++;
7150
7151 if (char_is_identifier_start(parser, variable)) {
7152 // At this point we're sure that we've either hit an embedded instance
7153 // or class variable. In this case we'll first need to check if we've
7154 // already consumed content.
7155 if (pound > parser->current.start) {
7156 parser->current.end = pound;
7158 }
7159
7160 // Otherwise we need to return the embedded variable token
7161 // and then switch to the embedded variable lex mode.
7162 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
7163 parser->current.end = pound + 1;
7164 return PM_TOKEN_EMBVAR;
7165 }
7166
7167 // If we didn't get an valid interpolation, then this is just regular
7168 // string content. This is like if we get "#@-". In this case the caller
7169 // should keep lexing.
7170 parser->current.end = pound + 1;
7171 return PM_TOKEN_NOT_PROVIDED;
7172 }
7173 case '$':
7174 // In this case we may have hit an embedded global variable. If there's
7175 // not enough room, then we'll just return string content.
7176 if (pound + 2 >= parser->end) {
7177 parser->current.end = pound + 1;
7179 }
7180
7181 // This is the character that we're going to check to see if it is the
7182 // start of an identifier that would indicate that this is a global
7183 // variable.
7184 const uint8_t *check = pound + 2;
7185
7186 if (pound[2] == '-') {
7187 if (pound + 3 >= parser->end) {
7188 parser->current.end = pound + 2;
7190 }
7191
7192 check++;
7193 }
7194
7195 // If the character that we're going to check is the start of an
7196 // identifier, or we don't have a - and the character is a decimal number
7197 // or a global name punctuation character, then we've hit an embedded
7198 // global variable.
7199 if (
7200 char_is_identifier_start(parser, check) ||
7201 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
7202 ) {
7203 // In this case we've hit an embedded global variable. First check to
7204 // see if we've already consumed content. If we have, then we need to
7205 // return that content as string content first.
7206 if (pound > parser->current.start) {
7207 parser->current.end = pound;
7209 }
7210
7211 // Otherwise, we need to return the embedded variable token and switch
7212 // to the embedded variable lex mode.
7213 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
7214 parser->current.end = pound + 1;
7215 return PM_TOKEN_EMBVAR;
7216 }
7217
7218 // In this case we've hit a #$ that does not indicate a global variable.
7219 // In this case we'll continue lexing past it.
7220 parser->current.end = pound + 1;
7221 return PM_TOKEN_NOT_PROVIDED;
7222 case '{':
7223 // In this case it's the start of an embedded expression. If we have
7224 // already consumed content, then we need to return that content as string
7225 // content first.
7226 if (pound > parser->current.start) {
7227 parser->current.end = pound;
7229 }
7230
7231 parser->enclosure_nesting++;
7232
7233 // Otherwise we'll skip past the #{ and begin lexing the embedded
7234 // expression.
7235 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
7236 parser->current.end = pound + 2;
7237 parser->command_start = true;
7238 pm_do_loop_stack_push(parser, false);
7240 default:
7241 // In this case we've hit a # that doesn't constitute interpolation. We'll
7242 // mark that by returning the not provided token type. This tells the
7243 // consumer to keep lexing forward.
7244 parser->current.end = pound + 1;
7245 return PM_TOKEN_NOT_PROVIDED;
7246 }
7247}
7248
7249static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
7250static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
7251static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
7252static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
7253static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
7254
7258static const bool ascii_printable_chars[] = {
7259 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
7260 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7261 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7262 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7263 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7264 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
7265 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7266 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
7267};
7268
7269static inline bool
7270char_is_ascii_printable(const uint8_t b) {
7271 return (b < 0x80) && ascii_printable_chars[b];
7272}
7273
7278static inline uint8_t
7279escape_hexadecimal_digit(const uint8_t value) {
7280 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
7281}
7282
7288static inline uint32_t
7289escape_unicode(const uint8_t *string, size_t length) {
7290 uint32_t value = 0;
7291 for (size_t index = 0; index < length; index++) {
7292 if (index != 0) value <<= 4;
7293 value |= escape_hexadecimal_digit(string[index]);
7294 }
7295 return value;
7296}
7297
7301static inline uint8_t
7302escape_byte(uint8_t value, const uint8_t flags) {
7303 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
7304 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
7305 return value;
7306}
7307
7311static inline void
7312escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
7313 // \u escape sequences in string-like structures implicitly change the
7314 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
7315 // literal.
7316 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
7317 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
7318 PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
7319 }
7320
7322 }
7323
7324 if (value <= 0x7F) { // 0xxxxxxx
7325 pm_buffer_append_byte(buffer, (uint8_t) value);
7326 } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
7327 pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
7328 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
7329 } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
7330 pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
7331 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
7332 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
7333 } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
7334 pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
7335 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
7336 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
7337 pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
7338 } else {
7339 pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
7340 pm_buffer_append_byte(buffer, 0xEF);
7341 pm_buffer_append_byte(buffer, 0xBF);
7342 pm_buffer_append_byte(buffer, 0xBD);
7343 }
7344}
7345
7350static inline void
7351escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
7352 if (byte >= 0x80) {
7353 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7354 PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
7355 }
7356
7357 parser->explicit_encoding = parser->encoding;
7358 }
7359
7360 pm_buffer_append_byte(buffer, byte);
7361}
7362
7378static inline void
7379escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
7380 if (flags & PM_ESCAPE_FLAG_REGEXP) {
7381 pm_buffer_append_bytes(buffer, (const uint8_t *) "\\x", 2);
7382
7383 uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
7384 uint8_t byte2 = (uint8_t) (byte & 0xF);
7385
7386 if (byte1 >= 0xA) {
7387 pm_buffer_append_byte(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
7388 } else {
7389 pm_buffer_append_byte(buffer, (uint8_t) (byte1 + '0'));
7390 }
7391
7392 if (byte2 >= 0xA) {
7393 pm_buffer_append_byte(buffer, (uint8_t) (byte2 - 0xA + 'A'));
7394 } else {
7395 pm_buffer_append_byte(buffer, (uint8_t) (byte2 + '0'));
7396 }
7397 } else {
7398 escape_write_byte_encoded(parser, buffer, byte);
7399 }
7400}
7401
7405static void
7406escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
7407 switch (peek(parser)) {
7408 case '\\': {
7409 parser->current.end++;
7410 escape_write_byte_encoded(parser, buffer, escape_byte('\\', flags));
7411 return;
7412 }
7413 case '\'': {
7414 parser->current.end++;
7415 escape_write_byte_encoded(parser, buffer, escape_byte('\'', flags));
7416 return;
7417 }
7418 case 'a': {
7419 parser->current.end++;
7420 escape_write_byte_encoded(parser, buffer, escape_byte('\a', flags));
7421 return;
7422 }
7423 case 'b': {
7424 parser->current.end++;
7425 escape_write_byte_encoded(parser, buffer, escape_byte('\b', flags));
7426 return;
7427 }
7428 case 'e': {
7429 parser->current.end++;
7430 escape_write_byte_encoded(parser, buffer, escape_byte('\033', flags));
7431 return;
7432 }
7433 case 'f': {
7434 parser->current.end++;
7435 escape_write_byte_encoded(parser, buffer, escape_byte('\f', flags));
7436 return;
7437 }
7438 case 'n': {
7439 parser->current.end++;
7440 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
7441 return;
7442 }
7443 case 'r': {
7444 parser->current.end++;
7445 escape_write_byte_encoded(parser, buffer, escape_byte('\r', flags));
7446 return;
7447 }
7448 case 's': {
7449 parser->current.end++;
7450 escape_write_byte_encoded(parser, buffer, escape_byte(' ', flags));
7451 return;
7452 }
7453 case 't': {
7454 parser->current.end++;
7455 escape_write_byte_encoded(parser, buffer, escape_byte('\t', flags));
7456 return;
7457 }
7458 case 'v': {
7459 parser->current.end++;
7460 escape_write_byte_encoded(parser, buffer, escape_byte('\v', flags));
7461 return;
7462 }
7463 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
7464 uint8_t value = (uint8_t) (*parser->current.end - '0');
7465 parser->current.end++;
7466
7467 if (pm_char_is_octal_digit(peek(parser))) {
7468 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
7469 parser->current.end++;
7470
7471 if (pm_char_is_octal_digit(peek(parser))) {
7472 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
7473 parser->current.end++;
7474 }
7475 }
7476
7477 escape_write_byte_encoded(parser, buffer, value);
7478 return;
7479 }
7480 case 'x': {
7481 const uint8_t *start = parser->current.end - 1;
7482
7483 parser->current.end++;
7484 uint8_t byte = peek(parser);
7485
7486 if (pm_char_is_hexadecimal_digit(byte)) {
7487 uint8_t value = escape_hexadecimal_digit(byte);
7488 parser->current.end++;
7489
7490 byte = peek(parser);
7491 if (pm_char_is_hexadecimal_digit(byte)) {
7492 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
7493 parser->current.end++;
7494 }
7495
7496 if (flags & PM_ESCAPE_FLAG_REGEXP) {
7497 pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
7498 } else {
7499 escape_write_byte_encoded(parser, buffer, value);
7500 }
7501 } else {
7502 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
7503 }
7504
7505 return;
7506 }
7507 case 'u': {
7508 const uint8_t *start = parser->current.end - 1;
7509 parser->current.end++;
7510
7511 if (
7512 (parser->current.end + 4 <= parser->end) &&
7513 pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
7514 pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
7515 pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
7516 pm_char_is_hexadecimal_digit(parser->current.end[3])
7517 ) {
7518 uint32_t value = escape_unicode(parser->current.end, 4);
7519
7520 if (flags & PM_ESCAPE_FLAG_REGEXP) {
7521 pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end + 4 - start));
7522 } else {
7523 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
7524 }
7525
7526 parser->current.end += 4;
7527 } else if (peek(parser) == '{') {
7528 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
7529
7530 parser->current.end++;
7531 parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
7532
7533 const uint8_t *extra_codepoints_start = NULL;
7534 int codepoints_count = 0;
7535
7536 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
7537 const uint8_t *unicode_start = parser->current.end;
7538 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
7539
7540 if (hexadecimal_length > 6) {
7541 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
7542 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
7543 } else if (hexadecimal_length == 0) {
7544 // there are not hexadecimal characters
7545 pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
7546 return;
7547 }
7548
7549 parser->current.end += hexadecimal_length;
7550 codepoints_count++;
7551 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
7552 extra_codepoints_start = unicode_start;
7553 }
7554
7555 if (!(flags & PM_ESCAPE_FLAG_REGEXP)) {
7556 uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
7557 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
7558 }
7559
7560 parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
7561 }
7562
7563 // ?\u{nnnn} character literal should contain only one codepoint
7564 // and cannot be like ?\u{nnnn mmmm}.
7565 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
7566 pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
7567 }
7568
7569 if (peek(parser) == '}') {
7570 parser->current.end++;
7571 } else {
7572 pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
7573 }
7574
7575 if (flags & PM_ESCAPE_FLAG_REGEXP) {
7576 pm_buffer_append_bytes(buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
7577 }
7578 } else {
7579 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
7580 }
7581
7582 return;
7583 }
7584 case 'c': {
7585 parser->current.end++;
7586 if (parser->current.end == parser->end) {
7587 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
7588 return;
7589 }
7590
7591 uint8_t peeked = peek(parser);
7592 switch (peeked) {
7593 case '?': {
7594 parser->current.end++;
7595 escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
7596 return;
7597 }
7598 case '\\':
7599 if (flags & PM_ESCAPE_FLAG_CONTROL) {
7600 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
7601 return;
7602 }
7603 parser->current.end++;
7604 escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
7605 return;
7606 default: {
7607 if (!char_is_ascii_printable(peeked)) {
7608 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
7609 return;
7610 }
7611
7612 parser->current.end++;
7613 escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7614 return;
7615 }
7616 }
7617 }
7618 case 'C': {
7619 parser->current.end++;
7620 if (peek(parser) != '-') {
7621 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
7622 return;
7623 }
7624
7625 parser->current.end++;
7626 if (parser->current.end == parser->end) {
7627 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
7628 return;
7629 }
7630
7631 uint8_t peeked = peek(parser);
7632 switch (peeked) {
7633 case '?': {
7634 parser->current.end++;
7635 escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
7636 return;
7637 }
7638 case '\\':
7639 if (flags & PM_ESCAPE_FLAG_CONTROL) {
7640 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
7641 return;
7642 }
7643 parser->current.end++;
7644 escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
7645 return;
7646 default: {
7647 if (!char_is_ascii_printable(peeked)) {
7648 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
7649 return;
7650 }
7651
7652 parser->current.end++;
7653 escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
7654 return;
7655 }
7656 }
7657 }
7658 case 'M': {
7659 parser->current.end++;
7660 if (peek(parser) != '-') {
7661 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
7662 return;
7663 }
7664
7665 parser->current.end++;
7666 if (parser->current.end == parser->end) {
7667 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
7668 return;
7669 }
7670
7671 uint8_t peeked = peek(parser);
7672 if (peeked == '\\') {
7673 if (flags & PM_ESCAPE_FLAG_META) {
7674 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
7675 return;
7676 }
7677 parser->current.end++;
7678 escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_META);
7679 return;
7680 }
7681
7682 if (!char_is_ascii_printable(peeked)) {
7683 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
7684 return;
7685 }
7686
7687 parser->current.end++;
7688 escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
7689 return;
7690 }
7691 case '\r': {
7692 if (peek_offset(parser, 1) == '\n') {
7693 parser->current.end += 2;
7694 escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
7695 return;
7696 }
7697 }
7698 /* fallthrough */
7699 default: {
7700 if (parser->current.end < parser->end) {
7701 escape_write_byte_encoded(parser, buffer, *parser->current.end++);
7702 }
7703 return;
7704 }
7705 }
7706}
7707
7733static pm_token_type_t
7734lex_question_mark(pm_parser_t *parser) {
7735 if (lex_state_end_p(parser)) {
7736 lex_state_set(parser, PM_LEX_STATE_BEG);
7738 }
7739
7740 if (parser->current.end >= parser->end) {
7741 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
7742 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
7744 }
7745
7746 if (pm_char_is_whitespace(*parser->current.end)) {
7747 lex_state_set(parser, PM_LEX_STATE_BEG);
7749 }
7750
7751 lex_state_set(parser, PM_LEX_STATE_BEG);
7752
7753 if (match(parser, '\\')) {
7754 lex_state_set(parser, PM_LEX_STATE_END);
7755
7756 pm_buffer_t buffer;
7757 pm_buffer_init_capacity(&buffer, 3);
7758
7759 escape_read(parser, &buffer, PM_ESCAPE_FLAG_SINGLE);
7760 pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
7761
7763 } else {
7764 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
7765
7766 // Ternary operators can have a ? immediately followed by an identifier
7767 // which starts with an underscore. We check for this case here.
7768 if (
7769 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
7770 (
7771 (parser->current.end + encoding_width >= parser->end) ||
7772 !char_is_identifier(parser, parser->current.end + encoding_width)
7773 )
7774 ) {
7775 lex_state_set(parser, PM_LEX_STATE_END);
7776 parser->current.end += encoding_width;
7777 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
7779 }
7780 }
7781
7783}
7784
7789static pm_token_type_t
7790lex_at_variable(pm_parser_t *parser) {
7792 size_t width;
7793
7794 if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) {
7795 parser->current.end += width;
7796
7797 while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
7798 parser->current.end += width;
7799 }
7800 } else if (type == PM_TOKEN_CLASS_VARIABLE) {
7801 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
7802 } else {
7803 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_INSTANCE);
7804 }
7805
7806 // If we're lexing an embedded variable, then we need to pop back into the
7807 // parent lex context.
7808 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
7809 lex_mode_pop(parser);
7810 }
7811
7812 return type;
7813}
7814
7818static inline void
7819parser_lex_callback(pm_parser_t *parser) {
7820 if (parser->lex_callback) {
7821 parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
7822 }
7823}
7824
7828static inline pm_comment_t *
7829parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
7830 pm_comment_t *comment = (pm_comment_t *) calloc(sizeof(pm_comment_t), 1);
7831 if (comment == NULL) return NULL;
7832
7833 *comment = (pm_comment_t) {
7834 .type = type,
7835 .location = { parser->current.start, parser->current.end }
7836 };
7837
7838 return comment;
7839}
7840
7846static pm_token_type_t
7847lex_embdoc(pm_parser_t *parser) {
7848 // First, lex out the EMBDOC_BEGIN token.
7849 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
7850
7851 if (newline == NULL) {
7852 parser->current.end = parser->end;
7853 } else {
7854 pm_newline_list_append(&parser->newline_list, newline);
7855 parser->current.end = newline + 1;
7856 }
7857
7858 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
7859 parser_lex_callback(parser);
7860
7861 // Now, create a comment that is going to be attached to the parser.
7862 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
7863 if (comment == NULL) return PM_TOKEN_EOF;
7864
7865 // Now, loop until we find the end of the embedded documentation or the end of
7866 // the file.
7867 while (parser->current.end + 4 <= parser->end) {
7868 parser->current.start = parser->current.end;
7869
7870 // If we've hit the end of the embedded documentation then we'll return that
7871 // token here.
7872 if (memcmp(parser->current.end, "=end", 4) == 0 &&
7873 (parser->current.end + 4 == parser->end || pm_char_is_whitespace(parser->current.end[4]))) {
7874 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
7875
7876 if (newline == NULL) {
7877 parser->current.end = parser->end;
7878 } else {
7879 pm_newline_list_append(&parser->newline_list, newline);
7880 parser->current.end = newline + 1;
7881 }
7882
7883 parser->current.type = PM_TOKEN_EMBDOC_END;
7884 parser_lex_callback(parser);
7885
7886 comment->location.end = parser->current.end;
7887 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
7888
7889 return PM_TOKEN_EMBDOC_END;
7890 }
7891
7892 // Otherwise, we'll parse until the end of the line and return a line of
7893 // embedded documentation.
7894 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
7895
7896 if (newline == NULL) {
7897 parser->current.end = parser->end;
7898 } else {
7899 pm_newline_list_append(&parser->newline_list, newline);
7900 parser->current.end = newline + 1;
7901 }
7902
7903 parser->current.type = PM_TOKEN_EMBDOC_LINE;
7904 parser_lex_callback(parser);
7905 }
7906
7907 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
7908
7909 comment->location.end = parser->current.end;
7910 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
7911
7912 return PM_TOKEN_EOF;
7913}
7914
7920static inline void
7921parser_lex_ignored_newline(pm_parser_t *parser) {
7922 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
7923 parser_lex_callback(parser);
7924}
7925
7935static inline void
7936parser_flush_heredoc_end(pm_parser_t *parser) {
7937 assert(parser->heredoc_end <= parser->end);
7938 parser->next_start = parser->heredoc_end;
7939 parser->heredoc_end = NULL;
7940}
7941
7960typedef struct {
7966
7971 const uint8_t *cursor;
7973
7977static inline void
7978pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
7979 pm_buffer_append_byte(&token_buffer->buffer, byte);
7980}
7981
7989static inline void
7990pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
7991 pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
7992}
7993
8004static void
8005pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
8006 if (token_buffer->cursor == NULL) {
8007 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
8008 } else {
8009 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
8010 pm_token_buffer_copy(parser, token_buffer);
8011 }
8012}
8013
8023static void
8024pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
8025 const uint8_t *start;
8026 if (token_buffer->cursor == NULL) {
8027 pm_buffer_init_capacity(&token_buffer->buffer, 16);
8028 start = parser->current.start;
8029 } else {
8030 start = token_buffer->cursor;
8031 }
8032
8033 const uint8_t *end = parser->current.end - 1;
8034 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
8035}
8036
8041static inline size_t
8042pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
8043 size_t whitespace = 0;
8044
8045 switch (indent) {
8046 case PM_HEREDOC_INDENT_NONE:
8047 // Do nothing, we can't match a terminator with
8048 // indentation and there's no need to calculate common
8049 // whitespace.
8050 break;
8051 case PM_HEREDOC_INDENT_DASH:
8052 // Skip past inline whitespace.
8053 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
8054 break;
8055 case PM_HEREDOC_INDENT_TILDE:
8056 // Skip past inline whitespace and calculate common
8057 // whitespace.
8058 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
8059 if (**cursor == '\t') {
8060 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
8061 } else {
8062 whitespace++;
8063 }
8064 (*cursor)++;
8065 }
8066
8067 break;
8068 }
8069
8070 return whitespace;
8071}
8072
8077#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
8078
8085static void
8086parser_lex(pm_parser_t *parser) {
8087 assert(parser->current.end <= parser->end);
8088 parser->previous = parser->current;
8089
8090 // This value mirrors cmd_state from CRuby.
8091 bool previous_command_start = parser->command_start;
8092 parser->command_start = false;
8093
8094 // This is used to communicate to the newline lexing function that we've
8095 // already seen a comment.
8096 bool lexed_comment = false;
8097
8098 // Here we cache the current value of the semantic token seen flag. This is
8099 // used to reset it in case we find a token that shouldn't flip this flag.
8100 unsigned int semantic_token_seen = parser->semantic_token_seen;
8101 parser->semantic_token_seen = true;
8102
8103 switch (parser->lex_modes.current->mode) {
8104 case PM_LEX_DEFAULT:
8105 case PM_LEX_EMBEXPR:
8106 case PM_LEX_EMBVAR:
8107
8108 // We have a specific named label here because we are going to jump back to
8109 // this location in the event that we have lexed a token that should not be
8110 // returned to the parser. This includes comments, ignored newlines, and
8111 // invalid tokens of some form.
8112 lex_next_token: {
8113 // If we have the special next_start pointer set, then we're going to jump
8114 // to that location and start lexing from there.
8115 if (parser->next_start != NULL) {
8116 parser->current.end = parser->next_start;
8117 parser->next_start = NULL;
8118 }
8119
8120 // This value mirrors space_seen from CRuby. It tracks whether or not
8121 // space has been eaten before the start of the next token.
8122 bool space_seen = false;
8123
8124 // First, we're going to skip past any whitespace at the front of the next
8125 // token.
8126 bool chomping = true;
8127 while (parser->current.end < parser->end && chomping) {
8128 switch (*parser->current.end) {
8129 case ' ':
8130 case '\t':
8131 case '\f':
8132 case '\v':
8133 parser->current.end++;
8134 space_seen = true;
8135 break;
8136 case '\r':
8137 if (match_eol_offset(parser, 1)) {
8138 chomping = false;
8139 } else {
8140 parser->current.end++;
8141 space_seen = true;
8142 }
8143 break;
8144 case '\\': {
8145 size_t eol_length = match_eol_offset(parser, 1);
8146 if (eol_length) {
8147 if (parser->heredoc_end) {
8148 parser->current.end = parser->heredoc_end;
8149 parser->heredoc_end = NULL;
8150 } else {
8151 parser->current.end += eol_length + 1;
8152 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
8153 space_seen = true;
8154 }
8155 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
8156 parser->current.end += 2;
8157 } else {
8158 chomping = false;
8159 }
8160
8161 break;
8162 }
8163 default:
8164 chomping = false;
8165 break;
8166 }
8167 }
8168
8169 // Next, we'll set to start of this token to be the current end.
8170 parser->current.start = parser->current.end;
8171
8172 // We'll check if we're at the end of the file. If we are, then we
8173 // need to return the EOF token.
8174 if (parser->current.end >= parser->end) {
8175 LEX(PM_TOKEN_EOF);
8176 }
8177
8178 // Finally, we'll check the current character to determine the next
8179 // token.
8180 switch (*parser->current.end++) {
8181 case '\0': // NUL or end of script
8182 case '\004': // ^D
8183 case '\032': // ^Z
8184 parser->current.end--;
8185 LEX(PM_TOKEN_EOF);
8186
8187 case '#': { // comments
8188 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
8189 parser->current.end = ending == NULL ? parser->end : ending;
8190
8191 // If we found a comment while lexing, then we're going to
8192 // add it to the list of comments in the file and keep
8193 // lexing.
8194 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
8195 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
8196
8197 if (ending) parser->current.end++;
8198 parser->current.type = PM_TOKEN_COMMENT;
8199 parser_lex_callback(parser);
8200
8201 // Here, parse the comment to see if it's a magic comment
8202 // and potentially change state on the parser.
8203 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
8204 ptrdiff_t length = parser->current.end - parser->current.start;
8205
8206 // If we didn't find a magic comment within the first
8207 // pass and we're at the start of the file, then we need
8208 // to do another pass to potentially find other patterns
8209 // for encoding comments.
8210 if (length >= 10) parser_lex_magic_comment_encoding(parser);
8211 }
8212
8213 lexed_comment = true;
8214 }
8215 /* fallthrough */
8216 case '\r':
8217 case '\n': {
8218 parser->semantic_token_seen = semantic_token_seen & 0x1;
8219 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
8220
8221 if (eol_length) {
8222 // The only way you can have carriage returns in this
8223 // particular loop is if you have a carriage return
8224 // followed by a newline. In that case we'll just skip
8225 // over the carriage return and continue lexing, in
8226 // order to make it so that the newline token
8227 // encapsulates both the carriage return and the
8228 // newline. Note that we need to check that we haven't
8229 // already lexed a comment here because that falls
8230 // through into here as well.
8231 if (!lexed_comment) {
8232 parser->current.end += eol_length - 1; // skip CR
8233 }
8234
8235 if (parser->heredoc_end == NULL) {
8236 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
8237 }
8238 }
8239
8240 if (parser->heredoc_end) {
8241 parser_flush_heredoc_end(parser);
8242 }
8243
8244 // If this is an ignored newline, then we can continue lexing after
8245 // calling the callback with the ignored newline token.
8246 switch (lex_state_ignored_p(parser)) {
8247 case PM_IGNORED_NEWLINE_NONE:
8248 break;
8249 case PM_IGNORED_NEWLINE_PATTERN:
8250 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
8251 if (!lexed_comment) parser_lex_ignored_newline(parser);
8252 lex_state_set(parser, PM_LEX_STATE_BEG);
8253 parser->command_start = true;
8254 parser->current.type = PM_TOKEN_NEWLINE;
8255 return;
8256 }
8257 /* fallthrough */
8258 case PM_IGNORED_NEWLINE_ALL:
8259 if (!lexed_comment) parser_lex_ignored_newline(parser);
8260 lexed_comment = false;
8261 goto lex_next_token;
8262 }
8263
8264 // Here we need to look ahead and see if there is a call operator
8265 // (either . or &.) that starts the next line. If there is, then this
8266 // is going to become an ignored newline and we're going to instead
8267 // return the call operator.
8268 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
8269 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
8270
8271 if (next_content < parser->end) {
8272 // If we hit a comment after a newline, then we're going to check
8273 // if it's ignored or if it's followed by a method call ('.').
8274 // If it is, then we're going to call the
8275 // callback with an ignored newline and then continue lexing.
8276 // Otherwise we'll return a regular newline.
8277 if (next_content[0] == '#') {
8278 // Here we look for a "." or "&." following a "\n".
8279 const uint8_t *following = next_newline(next_content, parser->end - next_content);
8280
8281 while (following && (following + 1 < parser->end)) {
8282 following++;
8283 following += pm_strspn_inline_whitespace(following, parser->end - following);
8284
8285 // If this is not followed by a comment, then we can break out
8286 // of this loop.
8287 if (peek_at(parser, following) != '#') break;
8288
8289 // If there is a comment, then we need to find the end of the
8290 // comment and continue searching from there.
8291 following = next_newline(following, parser->end - following);
8292 }
8293
8294 // If the lex state was ignored, or we hit a '.' or a '&.',
8295 // we will lex the ignored newline
8296 if (
8297 lex_state_ignored_p(parser) ||
8298 (following && (
8299 (peek_at(parser, following) == '.') ||
8300 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
8301 ))
8302 ) {
8303 if (!lexed_comment) parser_lex_ignored_newline(parser);
8304 lexed_comment = false;
8305 goto lex_next_token;
8306 }
8307 }
8308
8309 // If we hit a . after a newline, then we're in a call chain and
8310 // we need to return the call operator.
8311 if (next_content[0] == '.') {
8312 // To match ripper, we need to emit an ignored newline even though
8313 // its a real newline in the case that we have a beginless range
8314 // on a subsequent line.
8315 if (peek_at(parser, next_content + 1) == '.') {
8316 if (!lexed_comment) parser_lex_ignored_newline(parser);
8317 lex_state_set(parser, PM_LEX_STATE_BEG);
8318 parser->command_start = true;
8319 parser->current.type = PM_TOKEN_NEWLINE;
8320 return;
8321 }
8322
8323 if (!lexed_comment) parser_lex_ignored_newline(parser);
8324 lex_state_set(parser, PM_LEX_STATE_DOT);
8325 parser->current.start = next_content;
8326 parser->current.end = next_content + 1;
8327 parser->next_start = NULL;
8328 LEX(PM_TOKEN_DOT);
8329 }
8330
8331 // If we hit a &. after a newline, then we're in a call chain and
8332 // we need to return the call operator.
8333 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
8334 if (!lexed_comment) parser_lex_ignored_newline(parser);
8335 lex_state_set(parser, PM_LEX_STATE_DOT);
8336 parser->current.start = next_content;
8337 parser->current.end = next_content + 2;
8338 parser->next_start = NULL;
8340 }
8341 }
8342
8343 // At this point we know this is a regular newline, and we can set the
8344 // necessary state and return the token.
8345 lex_state_set(parser, PM_LEX_STATE_BEG);
8346 parser->command_start = true;
8347 parser->current.type = PM_TOKEN_NEWLINE;
8348 if (!lexed_comment) parser_lex_callback(parser);
8349 return;
8350 }
8351
8352 // ,
8353 case ',':
8354 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8355 LEX(PM_TOKEN_COMMA);
8356
8357 // (
8358 case '(': {
8360
8361 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
8363 }
8364
8365 parser->enclosure_nesting++;
8366 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8367 pm_do_loop_stack_push(parser, false);
8368 LEX(type);
8369 }
8370
8371 // )
8372 case ')':
8373 parser->enclosure_nesting--;
8374 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8375 pm_do_loop_stack_pop(parser);
8377
8378 // ;
8379 case ';':
8380 lex_state_set(parser, PM_LEX_STATE_BEG);
8381 parser->command_start = true;
8382 LEX(PM_TOKEN_SEMICOLON);
8383
8384 // [ [] []=
8385 case '[':
8386 parser->enclosure_nesting++;
8388
8389 if (lex_state_operator_p(parser)) {
8390 if (match(parser, ']')) {
8391 parser->enclosure_nesting--;
8392 lex_state_set(parser, PM_LEX_STATE_ARG);
8394 }
8395
8396 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
8397 LEX(type);
8398 }
8399
8400 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
8402 }
8403
8404 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8405 pm_do_loop_stack_push(parser, false);
8406 LEX(type);
8407
8408 // ]
8409 case ']':
8410 parser->enclosure_nesting--;
8411 lex_state_set(parser, PM_LEX_STATE_END);
8412 pm_do_loop_stack_pop(parser);
8414
8415 // {
8416 case '{': {
8418
8419 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
8420 // This { begins a lambda
8421 parser->command_start = true;
8422 lex_state_set(parser, PM_LEX_STATE_BEG);
8424 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
8425 // This { begins a hash literal
8426 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8427 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
8428 // This { begins a block
8429 parser->command_start = true;
8430 lex_state_set(parser, PM_LEX_STATE_BEG);
8431 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
8432 // This { begins a block on a command
8433 parser->command_start = true;
8434 lex_state_set(parser, PM_LEX_STATE_BEG);
8435 } else {
8436 // This { begins a hash literal
8437 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8438 }
8439
8440 parser->enclosure_nesting++;
8441 parser->brace_nesting++;
8442 pm_do_loop_stack_push(parser, false);
8443
8444 LEX(type);
8445 }
8446
8447 // }
8448 case '}':
8449 parser->enclosure_nesting--;
8450 pm_do_loop_stack_pop(parser);
8451
8452 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
8453 lex_mode_pop(parser);
8455 }
8456
8457 parser->brace_nesting--;
8458 lex_state_set(parser, PM_LEX_STATE_END);
8460
8461 // * ** **= *=
8462 case '*': {
8463 if (match(parser, '*')) {
8464 if (match(parser, '=')) {
8465 lex_state_set(parser, PM_LEX_STATE_BEG);
8467 }
8468
8470
8471 if (lex_state_spcarg_p(parser, space_seen) || lex_state_beg_p(parser)) {
8473 }
8474
8475 if (lex_state_operator_p(parser)) {
8476 lex_state_set(parser, PM_LEX_STATE_ARG);
8477 } else {
8478 lex_state_set(parser, PM_LEX_STATE_BEG);
8479 }
8480
8481 LEX(type);
8482 }
8483
8484 if (match(parser, '=')) {
8485 lex_state_set(parser, PM_LEX_STATE_BEG);
8487 }
8488
8490
8491 if (lex_state_spcarg_p(parser, space_seen)) {
8492 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
8494 } else if (lex_state_beg_p(parser)) {
8496 }
8497
8498 if (lex_state_operator_p(parser)) {
8499 lex_state_set(parser, PM_LEX_STATE_ARG);
8500 } else {
8501 lex_state_set(parser, PM_LEX_STATE_BEG);
8502 }
8503
8504 LEX(type);
8505 }
8506
8507 // ! != !~ !@
8508 case '!':
8509 if (lex_state_operator_p(parser)) {
8510 lex_state_set(parser, PM_LEX_STATE_ARG);
8511 if (match(parser, '@')) {
8512 LEX(PM_TOKEN_BANG);
8513 }
8514 } else {
8515 lex_state_set(parser, PM_LEX_STATE_BEG);
8516 }
8517
8518 if (match(parser, '=')) {
8520 }
8521
8522 if (match(parser, '~')) {
8524 }
8525
8526 LEX(PM_TOKEN_BANG);
8527
8528 // = => =~ == === =begin
8529 case '=':
8530 if (current_token_starts_line(parser) && (parser->current.end + 5 <= parser->end) && memcmp(parser->current.end, "begin", 5) == 0 && pm_char_is_whitespace(peek_offset(parser, 5))) {
8531 pm_token_type_t type = lex_embdoc(parser);
8532
8533 if (type == PM_TOKEN_EOF) {
8534 LEX(type);
8535 }
8536
8537 goto lex_next_token;
8538 }
8539
8540 if (lex_state_operator_p(parser)) {
8541 lex_state_set(parser, PM_LEX_STATE_ARG);
8542 } else {
8543 lex_state_set(parser, PM_LEX_STATE_BEG);
8544 }
8545
8546 if (match(parser, '>')) {
8548 }
8549
8550 if (match(parser, '~')) {
8552 }
8553
8554 if (match(parser, '=')) {
8555 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
8556 }
8557
8558 LEX(PM_TOKEN_EQUAL);
8559
8560 // < << <<= <= <=>
8561 case '<':
8562 if (match(parser, '<')) {
8563 if (
8564 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
8565 !lex_state_end_p(parser) &&
8566 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
8567 ) {
8568 const uint8_t *end = parser->current.end;
8569
8570 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
8571 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
8572
8573 if (match(parser, '-')) {
8574 indent = PM_HEREDOC_INDENT_DASH;
8575 }
8576 else if (match(parser, '~')) {
8577 indent = PM_HEREDOC_INDENT_TILDE;
8578 }
8579
8580 if (match(parser, '`')) {
8581 quote = PM_HEREDOC_QUOTE_BACKTICK;
8582 }
8583 else if (match(parser, '"')) {
8584 quote = PM_HEREDOC_QUOTE_DOUBLE;
8585 }
8586 else if (match(parser, '\'')) {
8587 quote = PM_HEREDOC_QUOTE_SINGLE;
8588 }
8589
8590 const uint8_t *ident_start = parser->current.end;
8591 size_t width = 0;
8592
8593 if (parser->current.end >= parser->end) {
8594 parser->current.end = end;
8595 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
8596 parser->current.end = end;
8597 } else {
8598 if (quote == PM_HEREDOC_QUOTE_NONE) {
8599 parser->current.end += width;
8600
8601 while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
8602 parser->current.end += width;
8603 }
8604 } else {
8605 // If we have quotes, then we're going to go until we find the
8606 // end quote.
8607 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
8608 parser->current.end++;
8609 }
8610 }
8611
8612 size_t ident_length = (size_t) (parser->current.end - ident_start);
8613 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
8614 // TODO: handle unterminated heredoc
8615 }
8616
8617 parser->explicit_encoding = NULL;
8618 lex_mode_push(parser, (pm_lex_mode_t) {
8619 .mode = PM_LEX_HEREDOC,
8620 .as.heredoc = {
8621 .ident_start = ident_start,
8622 .ident_length = ident_length,
8623 .next_start = parser->current.end,
8624 .quote = quote,
8625 .indent = indent,
8626 .common_whitespace = (size_t) -1
8627 }
8628 });
8629
8630 if (parser->heredoc_end == NULL) {
8631 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
8632
8633 if (body_start == NULL) {
8634 // If there is no newline after the heredoc identifier, then
8635 // this is not a valid heredoc declaration. In this case we
8636 // will add an error, but we will still return a heredoc
8637 // start.
8638 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
8639 body_start = parser->end;
8640 } else {
8641 // Otherwise, we want to indicate that the body of the
8642 // heredoc starts on the character after the next newline.
8643 pm_newline_list_append(&parser->newline_list, body_start);
8644 body_start++;
8645 }
8646
8647 parser->next_start = body_start;
8648 } else {
8649 parser->next_start = parser->heredoc_end;
8650 }
8651
8653 }
8654 }
8655
8656 if (match(parser, '=')) {
8657 lex_state_set(parser, PM_LEX_STATE_BEG);
8659 }
8660
8661 if (lex_state_operator_p(parser)) {
8662 lex_state_set(parser, PM_LEX_STATE_ARG);
8663 } else {
8664 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
8665 lex_state_set(parser, PM_LEX_STATE_BEG);
8666 }
8667
8668 LEX(PM_TOKEN_LESS_LESS);
8669 }
8670
8671 if (lex_state_operator_p(parser)) {
8672 lex_state_set(parser, PM_LEX_STATE_ARG);
8673 } else {
8674 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
8675 lex_state_set(parser, PM_LEX_STATE_BEG);
8676 }
8677
8678 if (match(parser, '=')) {
8679 if (match(parser, '>')) {
8681 }
8682
8684 }
8685
8686 LEX(PM_TOKEN_LESS);
8687
8688 // > >> >>= >=
8689 case '>':
8690 if (match(parser, '>')) {
8691 if (lex_state_operator_p(parser)) {
8692 lex_state_set(parser, PM_LEX_STATE_ARG);
8693 } else {
8694 lex_state_set(parser, PM_LEX_STATE_BEG);
8695 }
8696 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
8697 }
8698
8699 if (lex_state_operator_p(parser)) {
8700 lex_state_set(parser, PM_LEX_STATE_ARG);
8701 } else {
8702 lex_state_set(parser, PM_LEX_STATE_BEG);
8703 }
8704
8705 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
8706
8707 // double-quoted string literal
8708 case '"': {
8709 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
8710 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
8712 }
8713
8714 // xstring literal
8715 case '`': {
8716 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
8717 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8718 LEX(PM_TOKEN_BACKTICK);
8719 }
8720
8721 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
8722 if (previous_command_start) {
8723 lex_state_set(parser, PM_LEX_STATE_CMDARG);
8724 } else {
8725 lex_state_set(parser, PM_LEX_STATE_ARG);
8726 }
8727
8728 LEX(PM_TOKEN_BACKTICK);
8729 }
8730
8731 lex_mode_push_string(parser, true, false, '\0', '`');
8732 LEX(PM_TOKEN_BACKTICK);
8733 }
8734
8735 // single-quoted string literal
8736 case '\'': {
8737 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
8738 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
8740 }
8741
8742 // ? character literal
8743 case '?':
8744 LEX(lex_question_mark(parser));
8745
8746 // & && &&= &=
8747 case '&': {
8748 if (match(parser, '&')) {
8749 lex_state_set(parser, PM_LEX_STATE_BEG);
8750
8751 if (match(parser, '=')) {
8753 }
8754
8756 }
8757
8758 if (match(parser, '=')) {
8759 lex_state_set(parser, PM_LEX_STATE_BEG);
8761 }
8762
8763 if (match(parser, '.')) {
8764 lex_state_set(parser, PM_LEX_STATE_DOT);
8766 }
8767
8769 if (lex_state_spcarg_p(parser, space_seen) || lex_state_beg_p(parser)) {
8771 }
8772
8773 if (lex_state_operator_p(parser)) {
8774 lex_state_set(parser, PM_LEX_STATE_ARG);
8775 } else {
8776 lex_state_set(parser, PM_LEX_STATE_BEG);
8777 }
8778
8779 LEX(type);
8780 }
8781
8782 // | || ||= |=
8783 case '|':
8784 if (match(parser, '|')) {
8785 if (match(parser, '=')) {
8786 lex_state_set(parser, PM_LEX_STATE_BEG);
8788 }
8789
8790 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
8791 parser->current.end--;
8792 LEX(PM_TOKEN_PIPE);
8793 }
8794
8795 lex_state_set(parser, PM_LEX_STATE_BEG);
8796 LEX(PM_TOKEN_PIPE_PIPE);
8797 }
8798
8799 if (match(parser, '=')) {
8800 lex_state_set(parser, PM_LEX_STATE_BEG);
8802 }
8803
8804 if (lex_state_operator_p(parser)) {
8805 lex_state_set(parser, PM_LEX_STATE_ARG);
8806 } else {
8807 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8808 }
8809
8810 LEX(PM_TOKEN_PIPE);
8811
8812 // + += +@
8813 case '+': {
8814 if (lex_state_operator_p(parser)) {
8815 lex_state_set(parser, PM_LEX_STATE_ARG);
8816
8817 if (match(parser, '@')) {
8818 LEX(PM_TOKEN_UPLUS);
8819 }
8820
8821 LEX(PM_TOKEN_PLUS);
8822 }
8823
8824 if (match(parser, '=')) {
8825 lex_state_set(parser, PM_LEX_STATE_BEG);
8827 }
8828
8829 bool spcarg = lex_state_spcarg_p(parser, space_seen);
8830 if (spcarg) {
8831 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS);
8832 }
8833
8834 if (lex_state_beg_p(parser) || spcarg) {
8835 lex_state_set(parser, PM_LEX_STATE_BEG);
8836
8837 if (pm_char_is_decimal_digit(peek(parser))) {
8838 parser->current.end++;
8839 pm_token_type_t type = lex_numeric(parser);
8840 lex_state_set(parser, PM_LEX_STATE_END);
8841 LEX(type);
8842 }
8843
8844 LEX(PM_TOKEN_UPLUS);
8845 }
8846
8847 lex_state_set(parser, PM_LEX_STATE_BEG);
8848 LEX(PM_TOKEN_PLUS);
8849 }
8850
8851 // - -= -@
8852 case '-': {
8853 if (lex_state_operator_p(parser)) {
8854 lex_state_set(parser, PM_LEX_STATE_ARG);
8855
8856 if (match(parser, '@')) {
8857 LEX(PM_TOKEN_UMINUS);
8858 }
8859
8860 LEX(PM_TOKEN_MINUS);
8861 }
8862
8863 if (match(parser, '=')) {
8864 lex_state_set(parser, PM_LEX_STATE_BEG);
8866 }
8867
8868 if (match(parser, '>')) {
8869 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8871 }
8872
8873 bool spcarg = lex_state_spcarg_p(parser, space_seen);
8874 if (spcarg) {
8875 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
8876 }
8877
8878 if (lex_state_beg_p(parser) || spcarg) {
8879 lex_state_set(parser, PM_LEX_STATE_BEG);
8880 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
8881 }
8882
8883 lex_state_set(parser, PM_LEX_STATE_BEG);
8884 LEX(PM_TOKEN_MINUS);
8885 }
8886
8887 // . .. ...
8888 case '.': {
8889 bool beg_p = lex_state_beg_p(parser);
8890
8891 if (match(parser, '.')) {
8892 if (match(parser, '.')) {
8893 // If we're _not_ inside a range within default parameters
8894 if (
8895 !context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) &&
8896 context_p(parser, PM_CONTEXT_DEF_PARAMS)
8897 ) {
8898 if (lex_state_p(parser, PM_LEX_STATE_END)) {
8899 lex_state_set(parser, PM_LEX_STATE_BEG);
8900 } else {
8901 lex_state_set(parser, PM_LEX_STATE_ENDARG);
8902 }
8904 }
8905
8906 lex_state_set(parser, PM_LEX_STATE_BEG);
8908 }
8909
8910 lex_state_set(parser, PM_LEX_STATE_BEG);
8911 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
8912 }
8913
8914 lex_state_set(parser, PM_LEX_STATE_DOT);
8915 LEX(PM_TOKEN_DOT);
8916 }
8917
8918 // integer
8919 case '0':
8920 case '1':
8921 case '2':
8922 case '3':
8923 case '4':
8924 case '5':
8925 case '6':
8926 case '7':
8927 case '8':
8928 case '9': {
8929 pm_token_type_t type = lex_numeric(parser);
8930 lex_state_set(parser, PM_LEX_STATE_END);
8931 LEX(type);
8932 }
8933
8934 // :: symbol
8935 case ':':
8936 if (match(parser, ':')) {
8937 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
8938 lex_state_set(parser, PM_LEX_STATE_BEG);
8940 }
8941
8942 lex_state_set(parser, PM_LEX_STATE_DOT);
8944 }
8945
8946 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
8947 lex_state_set(parser, PM_LEX_STATE_BEG);
8948 LEX(PM_TOKEN_COLON);
8949 }
8950
8951 if (peek(parser) == '"' || peek(parser) == '\'') {
8952 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
8953 parser->current.end++;
8954 }
8955
8956 lex_state_set(parser, PM_LEX_STATE_FNAME);
8958
8959 // / /=
8960 case '/':
8961 if (lex_state_beg_p(parser)) {
8962 lex_mode_push_regexp(parser, '\0', '/');
8964 }
8965
8966 if (match(parser, '=')) {
8967 lex_state_set(parser, PM_LEX_STATE_BEG);
8969 }
8970
8971 if (lex_state_spcarg_p(parser, space_seen)) {
8972 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
8973 lex_mode_push_regexp(parser, '\0', '/');
8975 }
8976
8977 if (lex_state_operator_p(parser)) {
8978 lex_state_set(parser, PM_LEX_STATE_ARG);
8979 } else {
8980 lex_state_set(parser, PM_LEX_STATE_BEG);
8981 }
8982
8983 LEX(PM_TOKEN_SLASH);
8984
8985 // ^ ^=
8986 case '^':
8987 if (lex_state_operator_p(parser)) {
8988 lex_state_set(parser, PM_LEX_STATE_ARG);
8989 } else {
8990 lex_state_set(parser, PM_LEX_STATE_BEG);
8991 }
8992 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
8993
8994 // ~ ~@
8995 case '~':
8996 if (lex_state_operator_p(parser)) {
8997 (void) match(parser, '@');
8998 lex_state_set(parser, PM_LEX_STATE_ARG);
8999 } else {
9000 lex_state_set(parser, PM_LEX_STATE_BEG);
9001 }
9002
9003 LEX(PM_TOKEN_TILDE);
9004
9005 // % %= %i %I %q %Q %w %W
9006 case '%': {
9007 // If there is no subsequent character then we have an
9008 // invalid token. We're going to say it's the percent
9009 // operator because we don't want to move into the string
9010 // lex mode unnecessarily.
9011 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
9012 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9013 LEX(PM_TOKEN_PERCENT);
9014 }
9015
9016 if (!lex_state_beg_p(parser) && match(parser, '=')) {
9017 lex_state_set(parser, PM_LEX_STATE_BEG);
9019 } else if (
9020 lex_state_beg_p(parser) ||
9021 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
9022 lex_state_spcarg_p(parser, space_seen)
9023 ) {
9024 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
9025 if (*parser->current.end >= 0x80) {
9026 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9027 }
9028
9029 lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9030
9031 size_t eol_length = match_eol(parser);
9032 if (eol_length) {
9033 parser->current.end += eol_length;
9034 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9035 } else {
9036 parser->current.end++;
9037 }
9038
9039 if (parser->current.end < parser->end) {
9041 }
9042 }
9043
9044 // Delimiters for %-literals cannot be alphanumeric. We
9045 // validate that here.
9046 uint8_t delimiter = peek_offset(parser, 1);
9047 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
9048 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9049 goto lex_next_token;
9050 }
9051
9052 switch (peek(parser)) {
9053 case 'i': {
9054 parser->current.end++;
9055
9056 if (parser->current.end < parser->end) {
9057 lex_mode_push_list(parser, false, *parser->current.end++);
9058 } else {
9059 lex_mode_push_list_eof(parser);
9060 }
9061
9063 }
9064 case 'I': {
9065 parser->current.end++;
9066
9067 if (parser->current.end < parser->end) {
9068 lex_mode_push_list(parser, true, *parser->current.end++);
9069 } else {
9070 lex_mode_push_list_eof(parser);
9071 }
9072
9074 }
9075 case 'r': {
9076 parser->current.end++;
9077
9078 if (parser->current.end < parser->end) {
9079 lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9080 pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9081 parser->current.end++;
9082 } else {
9083 lex_mode_push_regexp(parser, '\0', '\0');
9084 }
9085
9087 }
9088 case 'q': {
9089 parser->current.end++;
9090
9091 if (parser->current.end < parser->end) {
9092 lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9093 pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9094 parser->current.end++;
9095 } else {
9096 lex_mode_push_string_eof(parser);
9097 }
9098
9100 }
9101 case 'Q': {
9102 parser->current.end++;
9103
9104 if (parser->current.end < parser->end) {
9105 lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9106 pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9107 parser->current.end++;
9108 } else {
9109 lex_mode_push_string_eof(parser);
9110 }
9111
9113 }
9114 case 's': {
9115 parser->current.end++;
9116
9117 if (parser->current.end < parser->end) {
9118 lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9119 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
9120 parser->current.end++;
9121 } else {
9122 lex_mode_push_string_eof(parser);
9123 }
9124
9126 }
9127 case 'w': {
9128 parser->current.end++;
9129
9130 if (parser->current.end < parser->end) {
9131 lex_mode_push_list(parser, false, *parser->current.end++);
9132 } else {
9133 lex_mode_push_list_eof(parser);
9134 }
9135
9137 }
9138 case 'W': {
9139 parser->current.end++;
9140
9141 if (parser->current.end < parser->end) {
9142 lex_mode_push_list(parser, true, *parser->current.end++);
9143 } else {
9144 lex_mode_push_list_eof(parser);
9145 }
9146
9148 }
9149 case 'x': {
9150 parser->current.end++;
9151
9152 if (parser->current.end < parser->end) {
9153 lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9154 parser->current.end++;
9155 } else {
9156 lex_mode_push_string_eof(parser);
9157 }
9158
9160 }
9161 default:
9162 // If we get to this point, then we have a % that is completely
9163 // unparseable. In this case we'll just drop it from the parser
9164 // and skip past it and hope that the next token is something
9165 // that we can parse.
9166 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
9167 goto lex_next_token;
9168 }
9169 }
9170
9171 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
9172 LEX(PM_TOKEN_PERCENT);
9173 }
9174
9175 // global variable
9176 case '$': {
9177 pm_token_type_t type = lex_global_variable(parser);
9178
9179 // If we're lexing an embedded variable, then we need to pop back into
9180 // the parent lex context.
9181 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9182 lex_mode_pop(parser);
9183 }
9184
9185 lex_state_set(parser, PM_LEX_STATE_END);
9186 LEX(type);
9187 }
9188
9189 // instance variable, class variable
9190 case '@':
9191 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
9192 LEX(lex_at_variable(parser));
9193
9194 default: {
9195 if (*parser->current.start != '_') {
9196 size_t width = char_is_identifier_start(parser, parser->current.start);
9197
9198 // If this isn't the beginning of an identifier, then it's an invalid
9199 // token as we've exhausted all of the other options. We'll skip past
9200 // it and return the next token.
9201 if (!width) {
9202 pm_parser_err_current(parser, PM_ERR_INVALID_TOKEN);
9203 goto lex_next_token;
9204 }
9205
9206 parser->current.end = parser->current.start + width;
9207 }
9208
9209 pm_token_type_t type = lex_identifier(parser, previous_command_start);
9210
9211 // If we've hit a __END__ and it was at the start of the line or the
9212 // start of the file and it is followed by either a \n or a \r\n, then
9213 // this is the last token of the file.
9214 if (
9215 ((parser->current.end - parser->current.start) == 7) &&
9216 current_token_starts_line(parser) &&
9217 (memcmp(parser->current.start, "__END__", 7) == 0) &&
9218 (parser->current.end == parser->end || match_eol(parser))
9219 )
9220 {
9221 // Since we know we're about to add an __END__ comment, we know we
9222 // need at add all of the newlines to get the correct column
9223 // information for it.
9224 const uint8_t *cursor = parser->current.end;
9225 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
9226 pm_newline_list_append(&parser->newline_list, cursor++);
9227 }
9228
9229 parser->current.end = parser->end;
9230 parser->current.type = PM_TOKEN___END__;
9231 parser_lex_callback(parser);
9232
9233 parser->data_loc.start = parser->current.start;
9234 parser->data_loc.end = parser->current.end;
9235
9236 LEX(PM_TOKEN_EOF);
9237 }
9238
9239 pm_lex_state_t last_state = parser->lex_state;
9240
9241 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
9242 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
9243 if (previous_command_start) {
9244 lex_state_set(parser, PM_LEX_STATE_CMDARG);
9245 } else {
9246 lex_state_set(parser, PM_LEX_STATE_ARG);
9247 }
9248 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
9249 lex_state_set(parser, PM_LEX_STATE_ENDFN);
9250 } else {
9251 lex_state_set(parser, PM_LEX_STATE_END);
9252 }
9253 }
9254
9255 if (
9256 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
9257 (type == PM_TOKEN_IDENTIFIER) &&
9258 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
9259 pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
9260 ) {
9261 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
9262 }
9263
9264 LEX(type);
9265 }
9266 }
9267 }
9268 case PM_LEX_LIST: {
9269 if (parser->next_start != NULL) {
9270 parser->current.end = parser->next_start;
9271 parser->next_start = NULL;
9272 }
9273
9274 // First we'll set the beginning of the token.
9275 parser->current.start = parser->current.end;
9276
9277 // If there's any whitespace at the start of the list, then we're
9278 // going to trim it off the beginning and create a new token.
9279 size_t whitespace;
9280
9281 if (parser->heredoc_end) {
9282 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9283 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
9284 whitespace += 1;
9285 }
9286 } else {
9287 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
9288 }
9289
9290 if (whitespace > 0) {
9291 parser->current.end += whitespace;
9292 if (peek_offset(parser, -1) == '\n') {
9293 // mutates next_start
9294 parser_flush_heredoc_end(parser);
9295 }
9296 LEX(PM_TOKEN_WORDS_SEP);
9297 }
9298
9299 // We'll check if we're at the end of the file. If we are, then we
9300 // need to return the EOF token.
9301 if (parser->current.end >= parser->end) {
9302 LEX(PM_TOKEN_EOF);
9303 }
9304
9305 // Here we'll get a list of the places where strpbrk should break,
9306 // and then find the first one.
9307 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9308 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
9309 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9310
9311 // If we haven't found an escape yet, then this buffer will be
9312 // unallocated since we can refer directly to the source string.
9313 pm_token_buffer_t token_buffer = { { 0 }, 0 };
9314
9315 while (breakpoint != NULL) {
9316 // If we hit a null byte, skip directly past it.
9317 if (*breakpoint == '\0') {
9318 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
9319 continue;
9320 }
9321
9322 // If we hit whitespace, then we must have received content by
9323 // now, so we can return an element of the list.
9324 if (pm_char_is_whitespace(*breakpoint)) {
9325 parser->current.end = breakpoint;
9326 pm_token_buffer_flush(parser, &token_buffer);
9328 }
9329
9330 // If we hit the terminator, we need to check which token to
9331 // return.
9332 if (*breakpoint == lex_mode->as.list.terminator) {
9333 // If this terminator doesn't actually close the list, then
9334 // we need to continue on past it.
9335 if (lex_mode->as.list.nesting > 0) {
9336 parser->current.end = breakpoint + 1;
9337 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9338 lex_mode->as.list.nesting--;
9339 continue;
9340 }
9341
9342 // If we've hit the terminator and we've already skipped
9343 // past content, then we can return a list node.
9344 if (breakpoint > parser->current.start) {
9345 parser->current.end = breakpoint;
9346 pm_token_buffer_flush(parser, &token_buffer);
9348 }
9349
9350 // Otherwise, switch back to the default state and return
9351 // the end of the list.
9352 parser->current.end = breakpoint + 1;
9353 lex_mode_pop(parser);
9354 lex_state_set(parser, PM_LEX_STATE_END);
9356 }
9357
9358 // If we hit escapes, then we need to treat the next token
9359 // literally. In this case we'll skip past the next character
9360 // and find the next breakpoint.
9361 if (*breakpoint == '\\') {
9362 parser->current.end = breakpoint + 1;
9363
9364 // If we've hit the end of the file, then break out of the
9365 // loop by setting the breakpoint to NULL.
9366 if (parser->current.end == parser->end) {
9367 breakpoint = NULL;
9368 continue;
9369 }
9370
9371 pm_token_buffer_escape(parser, &token_buffer);
9372 uint8_t peeked = peek(parser);
9373
9374 switch (peeked) {
9375 case ' ':
9376 case '\f':
9377 case '\t':
9378 case '\v':
9379 case '\\':
9380 pm_token_buffer_push(&token_buffer, peeked);
9381 parser->current.end++;
9382 break;
9383 case '\r':
9384 parser->current.end++;
9385 if (peek(parser) != '\n') {
9386 pm_token_buffer_push(&token_buffer, '\r');
9387 break;
9388 }
9389 /* fallthrough */
9390 case '\n':
9391 pm_token_buffer_push(&token_buffer, '\n');
9392
9393 if (parser->heredoc_end) {
9394 // ... if we are on the same line as a heredoc,
9395 // flush the heredoc and continue parsing after
9396 // heredoc_end.
9397 parser_flush_heredoc_end(parser);
9398 pm_token_buffer_copy(parser, &token_buffer);
9400 } else {
9401 // ... else track the newline.
9402 pm_newline_list_append(&parser->newline_list, parser->current.end);
9403 }
9404
9405 parser->current.end++;
9406 break;
9407 default:
9408 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
9409 pm_token_buffer_push(&token_buffer, peeked);
9410 parser->current.end++;
9411 } else if (lex_mode->as.list.interpolation) {
9412 escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
9413 } else {
9414 pm_token_buffer_push(&token_buffer, '\\');
9415 pm_token_buffer_push(&token_buffer, peeked);
9416 parser->current.end++;
9417 }
9418
9419 break;
9420 }
9421
9422 token_buffer.cursor = parser->current.end;
9423 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9424 continue;
9425 }
9426
9427 // If we hit a #, then we will attempt to lex interpolation.
9428 if (*breakpoint == '#') {
9429 pm_token_type_t type = lex_interpolation(parser, breakpoint);
9430
9431 if (type == PM_TOKEN_NOT_PROVIDED) {
9432 // If we haven't returned at this point then we had something
9433 // that looked like an interpolated class or instance variable
9434 // like "#@" but wasn't actually. In this case we'll just skip
9435 // to the next breakpoint.
9436 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9437 continue;
9438 }
9439
9440 if (type == PM_TOKEN_STRING_CONTENT) {
9441 pm_token_buffer_flush(parser, &token_buffer);
9442 }
9443
9444 LEX(type);
9445 }
9446
9447 // If we've hit the incrementor, then we need to skip past it
9448 // and find the next breakpoint.
9449 assert(*breakpoint == lex_mode->as.list.incrementor);
9450 parser->current.end = breakpoint + 1;
9451 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9452 lex_mode->as.list.nesting++;
9453 continue;
9454 }
9455
9456 if (parser->current.end > parser->current.start) {
9457 pm_token_buffer_flush(parser, &token_buffer);
9459 }
9460
9461 // If we were unable to find a breakpoint, then this token hits the
9462 // end of the file.
9463 parser->current.end = parser->end;
9464 pm_token_buffer_flush(parser, &token_buffer);
9466 }
9467 case PM_LEX_REGEXP: {
9468 // First, we'll set to start of this token to be the current end.
9469 if (parser->next_start == NULL) {
9470 parser->current.start = parser->current.end;
9471 } else {
9472 parser->current.start = parser->next_start;
9473 parser->current.end = parser->next_start;
9474 parser->next_start = NULL;
9475 }
9476
9477 // We'll check if we're at the end of the file. If we are, then we need to
9478 // return the EOF token.
9479 if (parser->current.end >= parser->end) {
9480 LEX(PM_TOKEN_EOF);
9481 }
9482
9483 // Get a reference to the current mode.
9484 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9485
9486 // These are the places where we need to split up the content of the
9487 // regular expression. We'll use strpbrk to find the first of these
9488 // characters.
9489 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
9490 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9491 pm_token_buffer_t token_buffer = { { 0 }, 0 };
9492
9493 while (breakpoint != NULL) {
9494 // If we hit a null byte, skip directly past it.
9495 if (*breakpoint == '\0') {
9496 parser->current.end = breakpoint + 1;
9497 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9498 continue;
9499 }
9500
9501 // If we've hit a newline, then we need to track that in the
9502 // list of newlines.
9503 if (*breakpoint == '\n') {
9504 // For the special case of a newline-terminated regular expression, we will pass
9505 // through this branch twice -- once with PM_TOKEN_REGEXP_BEGIN and then again
9506 // with PM_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
9507 // tracking it only in the REGEXP_BEGIN case.
9508 if (
9509 !(lex_mode->as.regexp.terminator == '\n' && parser->current.type != PM_TOKEN_REGEXP_BEGIN)
9510 && parser->heredoc_end == NULL
9511 ) {
9512 pm_newline_list_append(&parser->newline_list, breakpoint);
9513 }
9514
9515 if (lex_mode->as.regexp.terminator != '\n') {
9516 // If the terminator is not a newline, then we can set
9517 // the next breakpoint and continue.
9518 parser->current.end = breakpoint + 1;
9519 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9520 continue;
9521 }
9522 }
9523
9524 // If we hit the terminator, we need to determine what kind of
9525 // token to return.
9526 if (*breakpoint == lex_mode->as.regexp.terminator) {
9527 if (lex_mode->as.regexp.nesting > 0) {
9528 parser->current.end = breakpoint + 1;
9529 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9530 lex_mode->as.regexp.nesting--;
9531 continue;
9532 }
9533
9534 // Here we've hit the terminator. If we have already consumed
9535 // content then we need to return that content as string content
9536 // first.
9537 if (breakpoint > parser->current.start) {
9538 parser->current.end = breakpoint;
9539 pm_token_buffer_flush(parser, &token_buffer);
9541 }
9542
9543 // Since we've hit the terminator of the regular expression,
9544 // we now need to parse the options.
9545 parser->current.end = breakpoint + 1;
9546 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
9547
9548 lex_mode_pop(parser);
9549 lex_state_set(parser, PM_LEX_STATE_END);
9551 }
9552
9553 // If we hit escapes, then we need to treat the next token
9554 // literally. In this case we'll skip past the next character
9555 // and find the next breakpoint.
9556 if (*breakpoint == '\\') {
9557 parser->current.end = breakpoint + 1;
9558
9559 // If we've hit the end of the file, then break out of the
9560 // loop by setting the breakpoint to NULL.
9561 if (parser->current.end == parser->end) {
9562 breakpoint = NULL;
9563 continue;
9564 }
9565
9566 pm_token_buffer_escape(parser, &token_buffer);
9567 uint8_t peeked = peek(parser);
9568
9569 switch (peeked) {
9570 case '\r':
9571 parser->current.end++;
9572 if (peek(parser) != '\n') {
9573 if (lex_mode->as.regexp.terminator != '\r') {
9574 pm_token_buffer_push(&token_buffer, '\\');
9575 }
9576 pm_token_buffer_push(&token_buffer, '\r');
9577 break;
9578 }
9579 /* fallthrough */
9580 case '\n':
9581 if (parser->heredoc_end) {
9582 // ... if we are on the same line as a heredoc,
9583 // flush the heredoc and continue parsing after
9584 // heredoc_end.
9585 parser_flush_heredoc_end(parser);
9586 pm_token_buffer_copy(parser, &token_buffer);
9588 } else {
9589 // ... else track the newline.
9590 pm_newline_list_append(&parser->newline_list, parser->current.end);
9591 }
9592
9593 parser->current.end++;
9594 break;
9595 case 'c':
9596 case 'C':
9597 case 'M':
9598 case 'u':
9599 case 'x':
9600 escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_REGEXP);
9601 break;
9602 default:
9603 if (lex_mode->as.regexp.terminator == peeked) {
9604 // Some characters when they are used as the
9605 // terminator also receive an escape. They are
9606 // enumerated here.
9607 switch (peeked) {
9608 case '$': case ')': case '*': case '+':
9609 case '.': case '>': case '?': case ']':
9610 case '^': case '|': case '}':
9611 pm_token_buffer_push(&token_buffer, '\\');
9612 break;
9613 default:
9614 break;
9615 }
9616
9617 pm_token_buffer_push(&token_buffer, peeked);
9618 parser->current.end++;
9619 break;
9620 }
9621
9622 if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
9623 pm_token_buffer_push(&token_buffer, peeked);
9624 parser->current.end++;
9625 break;
9626 }
9627
9628 token_buffer.cursor = parser->current.end;
9629 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9630 continue;
9631 }
9632
9633 // If we hit a #, then we will attempt to lex interpolation.
9634 if (*breakpoint == '#') {
9635 pm_token_type_t type = lex_interpolation(parser, breakpoint);
9636
9637 if (type == PM_TOKEN_NOT_PROVIDED) {
9638 // If we haven't returned at this point then we had
9639 // something that looked like an interpolated class or
9640 // instance variable like "#@" but wasn't actually. In
9641 // this case we'll just skip to the next breakpoint.
9642 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9643 continue;
9644 }
9645
9646 if (type == PM_TOKEN_STRING_CONTENT) {
9647 pm_token_buffer_flush(parser, &token_buffer);
9648 }
9649
9650 LEX(type);
9651 }
9652
9653 // If we've hit the incrementor, then we need to skip past it
9654 // and find the next breakpoint.
9655 assert(*breakpoint == lex_mode->as.regexp.incrementor);
9656 parser->current.end = breakpoint + 1;
9657 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9658 lex_mode->as.regexp.nesting++;
9659 continue;
9660 }
9661
9662 if (parser->current.end > parser->current.start) {
9663 pm_token_buffer_flush(parser, &token_buffer);
9665 }
9666
9667 // If we were unable to find a breakpoint, then this token hits the
9668 // end of the file.
9669 parser->current.end = parser->end;
9670 pm_token_buffer_flush(parser, &token_buffer);
9672 }
9673 case PM_LEX_STRING: {
9674 // First, we'll set to start of this token to be the current end.
9675 if (parser->next_start == NULL) {
9676 parser->current.start = parser->current.end;
9677 } else {
9678 parser->current.start = parser->next_start;
9679 parser->current.end = parser->next_start;
9680 parser->next_start = NULL;
9681 }
9682
9683 // We'll check if we're at the end of the file. If we are, then we need to
9684 // return the EOF token.
9685 if (parser->current.end >= parser->end) {
9686 LEX(PM_TOKEN_EOF);
9687 }
9688
9689 // These are the places where we need to split up the content of the
9690 // string. We'll use strpbrk to find the first of these characters.
9691 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9692 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
9693 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9694
9695 // If we haven't found an escape yet, then this buffer will be
9696 // unallocated since we can refer directly to the source string.
9697 pm_token_buffer_t token_buffer = { { 0 }, 0 };
9698
9699 while (breakpoint != NULL) {
9700 // If we hit the incrementor, then we'll increment then nesting and
9701 // continue lexing.
9702 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
9703 lex_mode->as.string.nesting++;
9704 parser->current.end = breakpoint + 1;
9705 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9706 continue;
9707 }
9708
9709 // Note that we have to check the terminator here first because we could
9710 // potentially be parsing a % string that has a # character as the
9711 // terminator.
9712 if (*breakpoint == lex_mode->as.string.terminator) {
9713 // If this terminator doesn't actually close the string, then we need
9714 // to continue on past it.
9715 if (lex_mode->as.string.nesting > 0) {
9716 parser->current.end = breakpoint + 1;
9717 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9718 lex_mode->as.string.nesting--;
9719 continue;
9720 }
9721
9722 // Here we've hit the terminator. If we have already consumed content
9723 // then we need to return that content as string content first.
9724 if (breakpoint > parser->current.start) {
9725 parser->current.end = breakpoint;
9726 pm_token_buffer_flush(parser, &token_buffer);
9728 }
9729
9730 // Otherwise we need to switch back to the parent lex mode and
9731 // return the end of the string.
9732 size_t eol_length = match_eol_at(parser, breakpoint);
9733 if (eol_length) {
9734 parser->current.end = breakpoint + eol_length;
9735 pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9736 } else {
9737 parser->current.end = breakpoint + 1;
9738 }
9739
9740 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
9741 parser->current.end++;
9742 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9743 lex_mode_pop(parser);
9744 LEX(PM_TOKEN_LABEL_END);
9745 }
9746
9747 lex_state_set(parser, PM_LEX_STATE_END);
9748 lex_mode_pop(parser);
9750 }
9751
9752 // When we hit a newline, we need to flush any potential heredocs. Note
9753 // that this has to happen after we check for the terminator in case the
9754 // terminator is a newline character.
9755 if (*breakpoint == '\n') {
9756 if (parser->heredoc_end == NULL) {
9757 pm_newline_list_append(&parser->newline_list, breakpoint);
9758 parser->current.end = breakpoint + 1;
9759 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9760 continue;
9761 } else {
9762 parser->current.end = breakpoint + 1;
9763 parser_flush_heredoc_end(parser);
9764 pm_token_buffer_flush(parser, &token_buffer);
9766 }
9767 }
9768
9769 switch (*breakpoint) {
9770 case '\0':
9771 // Skip directly past the null character.
9772 parser->current.end = breakpoint + 1;
9773 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9774 break;
9775 case '\\': {
9776 // Here we hit escapes.
9777 parser->current.end = breakpoint + 1;
9778
9779 // If we've hit the end of the file, then break out of
9780 // the loop by setting the breakpoint to NULL.
9781 if (parser->current.end == parser->end) {
9782 breakpoint = NULL;
9783 continue;
9784 }
9785
9786 pm_token_buffer_escape(parser, &token_buffer);
9787 uint8_t peeked = peek(parser);
9788
9789 switch (peeked) {
9790 case '\\':
9791 pm_token_buffer_push(&token_buffer, '\\');
9792 parser->current.end++;
9793 break;
9794 case '\r':
9795 parser->current.end++;
9796 if (peek(parser) != '\n') {
9797 if (!lex_mode->as.string.interpolation) {
9798 pm_token_buffer_push(&token_buffer, '\\');
9799 }
9800 pm_token_buffer_push(&token_buffer, '\r');
9801 break;
9802 }
9803 /* fallthrough */
9804 case '\n':
9805 if (!lex_mode->as.string.interpolation) {
9806 pm_token_buffer_push(&token_buffer, '\\');
9807 pm_token_buffer_push(&token_buffer, '\n');
9808 }
9809
9810 if (parser->heredoc_end) {
9811 // ... if we are on the same line as a heredoc,
9812 // flush the heredoc and continue parsing after
9813 // heredoc_end.
9814 parser_flush_heredoc_end(parser);
9815 pm_token_buffer_copy(parser, &token_buffer);
9817 } else {
9818 // ... else track the newline.
9819 pm_newline_list_append(&parser->newline_list, parser->current.end);
9820 }
9821
9822 parser->current.end++;
9823 break;
9824 default:
9825 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
9826 pm_token_buffer_push(&token_buffer, peeked);
9827 parser->current.end++;
9828 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
9829 pm_token_buffer_push(&token_buffer, peeked);
9830 parser->current.end++;
9831 } else if (lex_mode->as.string.interpolation) {
9832 escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
9833 } else {
9834 pm_token_buffer_push(&token_buffer, '\\');
9835 pm_token_buffer_push(&token_buffer, peeked);
9836 parser->current.end++;
9837 }
9838
9839 break;
9840 }
9841
9842 token_buffer.cursor = parser->current.end;
9843 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9844 break;
9845 }
9846 case '#': {
9847 pm_token_type_t type = lex_interpolation(parser, breakpoint);
9848
9849 if (type == PM_TOKEN_NOT_PROVIDED) {
9850 // If we haven't returned at this point then we had something that
9851 // looked like an interpolated class or instance variable like "#@"
9852 // but wasn't actually. In this case we'll just skip to the next
9853 // breakpoint.
9854 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9855 break;
9856 }
9857
9858 if (type == PM_TOKEN_STRING_CONTENT) {
9859 pm_token_buffer_flush(parser, &token_buffer);
9860 }
9861
9862 LEX(type);
9863 }
9864 default:
9865 assert(false && "unreachable");
9866 }
9867 }
9868
9869 if (parser->current.end > parser->current.start) {
9870 pm_token_buffer_flush(parser, &token_buffer);
9872 }
9873
9874 // If we've hit the end of the string, then this is an unterminated
9875 // string. In that case we'll return a string content token.
9876 parser->current.end = parser->end;
9877 pm_token_buffer_flush(parser, &token_buffer);
9879 }
9880 case PM_LEX_HEREDOC: {
9881 // First, we'll set to start of this token.
9882 if (parser->next_start == NULL) {
9883 parser->current.start = parser->current.end;
9884 } else {
9885 parser->current.start = parser->next_start;
9886 parser->current.end = parser->next_start;
9887 parser->heredoc_end = NULL;
9888 parser->next_start = NULL;
9889 }
9890
9891 // We'll check if we're at the end of the file. If we are, then we need to
9892 // return the EOF token.
9893 if (parser->current.end >= parser->end) {
9894 LEX(PM_TOKEN_EOF);
9895 }
9896
9897 // Now let's grab the information about the identifier off of the current
9898 // lex mode.
9899 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9900 const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
9901 size_t ident_length = lex_mode->as.heredoc.ident_length;
9902
9903 // If we are immediately following a newline and we have hit the
9904 // terminator, then we need to return the ending of the heredoc.
9905 if (current_token_starts_line(parser)) {
9906 const uint8_t *start = parser->current.start;
9907 if (start + ident_length <= parser->end) {
9908 const uint8_t *newline = next_newline(start, parser->end - start);
9909 const uint8_t *ident_end = newline;
9910 const uint8_t *terminator_end = newline;
9911
9912 if (newline == NULL) {
9913 terminator_end = parser->end;
9914 ident_end = parser->end;
9915 } else {
9916 terminator_end++;
9917 if (newline[-1] == '\r') {
9918 ident_end--; // Remove \r
9919 }
9920 }
9921
9922 const uint8_t *terminator_start = ident_end - ident_length;
9923 const uint8_t *cursor = start;
9924
9925 if (
9926 lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
9927 lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE
9928 ) {
9929 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
9930 cursor++;
9931 }
9932 }
9933
9934 if (
9935 (cursor == terminator_start) &&
9936 (memcmp(terminator_start, ident_start, ident_length) == 0)
9937 ) {
9938 if (newline != NULL) {
9939 pm_newline_list_append(&parser->newline_list, newline);
9940 }
9941
9942 parser->current.end = terminator_end;
9943 if (*lex_mode->as.heredoc.next_start == '\\') {
9944 parser->next_start = NULL;
9945 } else {
9946 parser->next_start = lex_mode->as.heredoc.next_start;
9947 parser->heredoc_end = parser->current.end;
9948 }
9949
9950 lex_state_set(parser, PM_LEX_STATE_END);
9952 }
9953 }
9954
9955 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
9956 if (
9957 lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE &&
9958 (lex_mode->as.heredoc.common_whitespace > whitespace) &&
9959 peek_at(parser, start) != '\n'
9960 ) {
9961 lex_mode->as.heredoc.common_whitespace = whitespace;
9962 }
9963 }
9964
9965 // Otherwise we'll be parsing string content. These are the places
9966 // where we need to split up the content of the heredoc. We'll use
9967 // strpbrk to find the first of these characters.
9968 uint8_t breakpoints[] = "\n\\#";
9969
9970 pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
9971 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
9972 breakpoints[2] = '\0';
9973 }
9974
9975 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9976 pm_token_buffer_t token_buffer = { { 0 }, 0 };
9977 bool was_escaped_newline = false;
9978
9979 while (breakpoint != NULL) {
9980 switch (*breakpoint) {
9981 case '\0':
9982 // Skip directly past the null character.
9983 parser->current.end = breakpoint + 1;
9984 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9985 break;
9986 case '\n': {
9987 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
9988 parser_flush_heredoc_end(parser);
9989 parser->current.end = breakpoint + 1;
9990 pm_token_buffer_flush(parser, &token_buffer);
9992 }
9993
9994 pm_newline_list_append(&parser->newline_list, breakpoint);
9995
9996 // If we have a - or ~ heredoc, then we can match after
9997 // some leading whitespace.
9998 const uint8_t *start = breakpoint + 1;
9999
10000 if (!was_escaped_newline && (start + ident_length <= parser->end)) {
10001 // We want to match the terminator starting from the end of the line in case
10002 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
10003 const uint8_t *newline = next_newline(start, parser->end - start);
10004
10005 if (newline == NULL) {
10006 newline = parser->end;
10007 } else if (newline[-1] == '\r') {
10008 newline--; // Remove \r
10009 }
10010
10011 // Start of a possible terminator.
10012 const uint8_t *terminator_start = newline - ident_length;
10013
10014 // Cursor to check for the leading whitespace. We skip the
10015 // leading whitespace if we have a - or ~ heredoc.
10016 const uint8_t *cursor = start;
10017
10018 if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_DASH ||
10019 lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
10020 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
10021 cursor++;
10022 }
10023 }
10024
10025 if (
10026 cursor == terminator_start &&
10027 (memcmp(terminator_start, ident_start, ident_length) == 0)
10028 ) {
10029 parser->current.end = breakpoint + 1;
10030 pm_token_buffer_flush(parser, &token_buffer);
10032 }
10033 }
10034
10035 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
10036
10037 // If we have hit a newline that is followed by a valid
10038 // terminator, then we need to return the content of the
10039 // heredoc here as string content. Then, the next time a
10040 // token is lexed, it will match again and return the
10041 // end of the heredoc.
10042
10043 if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
10044 if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
10045 lex_mode->as.heredoc.common_whitespace = whitespace;
10046 }
10047
10048 parser->current.end = breakpoint + 1;
10049
10050 if (!was_escaped_newline) {
10051 pm_token_buffer_flush(parser, &token_buffer);
10053 }
10054 }
10055
10056 // Otherwise we hit a newline and it wasn't followed by
10057 // a terminator, so we can continue parsing.
10058 parser->current.end = breakpoint + 1;
10059 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10060 break;
10061 }
10062 case '\\': {
10063 // If we hit an escape, then we need to skip past
10064 // however many characters the escape takes up. However
10065 // it's important that if \n or \r\n are escaped that we
10066 // stop looping before the newline and not after the
10067 // newline so that we can still potentially find the
10068 // terminator of the heredoc.
10069 parser->current.end = breakpoint + 1;
10070
10071 // If we've hit the end of the file, then break out of
10072 // the loop by setting the breakpoint to NULL.
10073 if (parser->current.end == parser->end) {
10074 breakpoint = NULL;
10075 continue;
10076 }
10077
10078 pm_token_buffer_escape(parser, &token_buffer);
10079 uint8_t peeked = peek(parser);
10080
10081 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
10082 switch (peeked) {
10083 case '\r':
10084 parser->current.end++;
10085 if (peek(parser) != '\n') {
10086 pm_token_buffer_push(&token_buffer, '\\');
10087 pm_token_buffer_push(&token_buffer, '\r');
10088 break;
10089 }
10090 /* fallthrough */
10091 case '\n':
10092 pm_token_buffer_push(&token_buffer, '\\');
10093 pm_token_buffer_push(&token_buffer, '\n');
10094 token_buffer.cursor = parser->current.end + 1;
10095 breakpoint = parser->current.end;
10096 continue;
10097 default:
10098 parser->current.end++;
10099 pm_token_buffer_push(&token_buffer, '\\');
10100 pm_token_buffer_push(&token_buffer, peeked);
10101 break;
10102 }
10103 } else {
10104 switch (peeked) {
10105 case '\r':
10106 parser->current.end++;
10107 if (peek(parser) != '\n') {
10108 pm_token_buffer_push(&token_buffer, '\r');
10109 break;
10110 }
10111 /* fallthrough */
10112 case '\n':
10113 was_escaped_newline = true;
10114 token_buffer.cursor = parser->current.end + 1;
10115 breakpoint = parser->current.end;
10116 continue;
10117 default:
10118 escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
10119 break;
10120 }
10121 }
10122
10123 token_buffer.cursor = parser->current.end;
10124 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10125 break;
10126 }
10127 case '#': {
10128 pm_token_type_t type = lex_interpolation(parser, breakpoint);
10129
10130 if (type == PM_TOKEN_NOT_PROVIDED) {
10131 // If we haven't returned at this point then we had
10132 // something that looked like an interpolated class
10133 // or instance variable like "#@" but wasn't
10134 // actually. In this case we'll just skip to the
10135 // next breakpoint.
10136 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10137 break;
10138 }
10139
10140 if (type == PM_TOKEN_STRING_CONTENT) {
10141 pm_token_buffer_flush(parser, &token_buffer);
10142 }
10143
10144 LEX(type);
10145 }
10146 default:
10147 assert(false && "unreachable");
10148 }
10149
10150 was_escaped_newline = false;
10151 }
10152
10153 if (parser->current.end > parser->current.start) {
10154 parser->current.end = parser->end;
10155 pm_token_buffer_flush(parser, &token_buffer);
10157 }
10158
10159 // If we've hit the end of the string, then this is an unterminated
10160 // heredoc. In that case we'll return a string content token.
10161 parser->current.end = parser->end;
10162 pm_token_buffer_flush(parser, &token_buffer);
10164 }
10165 }
10166
10167 assert(false && "unreachable");
10168}
10169
10170#undef LEX
10171
10172/******************************************************************************/
10173/* Parse functions */
10174/******************************************************************************/
10175
10184typedef enum {
10185 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
10186 PM_BINDING_POWER_STATEMENT = 2,
10187 PM_BINDING_POWER_MODIFIER = 4, // if unless until while
10188 PM_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue
10189 PM_BINDING_POWER_COMPOSITION = 8, // and or
10190 PM_BINDING_POWER_NOT = 10, // not
10191 PM_BINDING_POWER_MATCH = 12, // => in
10192 PM_BINDING_POWER_DEFINED = 14, // defined?
10193 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
10194 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
10195 PM_BINDING_POWER_TERNARY = 20, // ?:
10196 PM_BINDING_POWER_RANGE = 22, // .. ...
10197 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
10198 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
10199 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
10200 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
10201 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
10202 PM_BINDING_POWER_BITWISE_AND = 34, // &
10203 PM_BINDING_POWER_SHIFT = 36, // << >>
10204 PM_BINDING_POWER_TERM = 38, // + -
10205 PM_BINDING_POWER_FACTOR = 40, // * / %
10206 PM_BINDING_POWER_UMINUS = 42, // -@
10207 PM_BINDING_POWER_EXPONENT = 44, // **
10208 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
10209 PM_BINDING_POWER_INDEX = 48, // [] []=
10210 PM_BINDING_POWER_CALL = 50, // :: .
10211 PM_BINDING_POWER_MAX = 52
10212} pm_binding_power_t;
10213
10218typedef struct {
10220 pm_binding_power_t left;
10221
10223 pm_binding_power_t right;
10224
10227
10234
10235#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
10236#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
10237#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
10238#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
10239#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
10240
10241pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
10242 // if unless until while
10243 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10244 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10245 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10246 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
10247
10248 // rescue
10249 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
10250
10251 // and or
10252 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
10253 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
10254
10255 // => in
10256 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
10257 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
10258
10259 // &&= &= ^= = >>= <<= -= %= |= += /= *= **=
10260 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
10261 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
10262 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
10263 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
10264 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
10265 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
10266 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
10267 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
10268 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
10269 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
10270 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
10271 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
10272 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
10273 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
10274
10275 // ?:
10276 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
10277
10278 // .. ...
10279 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10280 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
10281 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
10282 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
10283
10284 // ||
10285 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
10286
10287 // &&
10288 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
10289
10290 // != !~ == === =~ <=>
10291 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10292 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10293 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10294 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10295 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10296 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
10297
10298 // > >= < <=
10299 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
10300 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
10301 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
10302 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
10303
10304 // ^ |
10305 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
10306 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
10307
10308 // &
10309 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
10310
10311 // >> <<
10312 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
10313 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
10314
10315 // - +
10316 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
10317 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
10318
10319 // % / *
10320 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
10321 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
10322 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
10323 [PM_TOKEN_USTAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
10324
10325 // -@
10326 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
10327 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
10328
10329 // **
10330 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
10331 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
10332
10333 // ! ~ +@
10334 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
10335 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
10336 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
10337
10338 // [
10339 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
10340
10341 // :: . &.
10342 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
10343 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
10344 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
10345};
10346
10347#undef BINDING_POWER_ASSIGNMENT
10348#undef LEFT_ASSOCIATIVE
10349#undef RIGHT_ASSOCIATIVE
10350#undef RIGHT_ASSOCIATIVE_UNARY
10351
10355static inline bool
10356match1(const pm_parser_t *parser, pm_token_type_t type) {
10357 return parser->current.type == type;
10358}
10359
10363static inline bool
10364match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
10365 return match1(parser, type1) || match1(parser, type2);
10366}
10367
10371static inline bool
10372match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
10373 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
10374}
10375
10379static inline bool
10380match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
10381 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
10382}
10383
10387static inline bool
10388match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
10389 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
10390}
10391
10395static inline bool
10396match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
10397 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
10398}
10399
10403static inline bool
10404match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
10405 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
10406}
10407
10411static inline bool
10412match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
10413 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
10414}
10415
10422static bool
10423accept1(pm_parser_t *parser, pm_token_type_t type) {
10424 if (match1(parser, type)) {
10425 parser_lex(parser);
10426 return true;
10427 }
10428 return false;
10429}
10430
10435static inline bool
10436accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
10437 if (match2(parser, type1, type2)) {
10438 parser_lex(parser);
10439 return true;
10440 }
10441 return false;
10442}
10443
10448static inline bool
10449accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
10450 if (match3(parser, type1, type2, type3)) {
10451 parser_lex(parser);
10452 return true;
10453 }
10454 return false;
10455}
10456
10468static void
10469expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
10470 if (accept1(parser, type)) return;
10471
10472 const uint8_t *location = parser->previous.end;
10473 pm_parser_err(parser, location, location, diag_id);
10474
10475 parser->previous.start = location;
10476 parser->previous.type = PM_TOKEN_MISSING;
10477}
10478
10483static void
10484expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
10485 if (accept2(parser, type1, type2)) return;
10486
10487 const uint8_t *location = parser->previous.end;
10488 pm_parser_err(parser, location, location, diag_id);
10489
10490 parser->previous.start = location;
10491 parser->previous.type = PM_TOKEN_MISSING;
10492}
10493
10497static void
10498expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
10499 if (accept3(parser, type1, type2, type3)) return;
10500
10501 const uint8_t *location = parser->previous.end;
10502 pm_parser_err(parser, location, location, diag_id);
10503
10504 parser->previous.start = location;
10505 parser->previous.type = PM_TOKEN_MISSING;
10506}
10507
10508static pm_node_t *
10509parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id);
10510
10514static pm_node_t *
10515parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
10516 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, diag_id);
10517 pm_assert_value_expression(parser, node);
10518 return node;
10519}
10520
10540static inline bool
10541token_begins_expression_p(pm_token_type_t type) {
10542 switch (type) {
10545 // We need to special case this because it is a binary operator that
10546 // should not be marked as beginning an expression.
10547 return false;
10550 case PM_TOKEN_COLON:
10551 case PM_TOKEN_COMMA:
10553 case PM_TOKEN_EOF:
10564 case PM_TOKEN_NEWLINE:
10566 case PM_TOKEN_SEMICOLON:
10567 // The reason we need this short-circuit is because we're using the
10568 // binding powers table to tell us if the subsequent token could
10569 // potentially be the start of an expression . If there _is_ a binding
10570 // power for one of these tokens, then we should remove it from this list
10571 // and let it be handled by the default case below.
10572 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
10573 return false;
10575 // This is a special case because this unary operator cannot appear
10576 // as a general operator, it only appears in certain circumstances.
10577 return false;
10579 case PM_TOKEN_UMINUS:
10581 case PM_TOKEN_UPLUS:
10582 case PM_TOKEN_BANG:
10583 case PM_TOKEN_TILDE:
10584 case PM_TOKEN_UDOT_DOT:
10586 // These unary tokens actually do have binding power associated with them
10587 // so that we can correctly place them into the precedence order. But we
10588 // want them to be marked as beginning an expression, so we need to
10589 // special case them here.
10590 return true;
10591 default:
10592 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
10593 }
10594}
10595
10600static pm_node_t *
10601parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
10602 if (accept1(parser, PM_TOKEN_USTAR)) {
10603 pm_token_t operator = parser->previous;
10604 pm_node_t *expression = parse_value_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10605 return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
10606 }
10607
10608 return parse_value_expression(parser, binding_power, accepts_command_call, diag_id);
10609}
10610
10615static void
10616parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
10617 // The method name needs to change. If we previously had
10618 // foo, we now need foo=. In this case we'll allocate a new
10619 // owned string, copy the previous method name in, and
10620 // append an =.
10621 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
10622 size_t length = constant->length;
10623 uint8_t *name = calloc(length + 1, sizeof(uint8_t));
10624 if (name == NULL) return;
10625
10626 memcpy(name, constant->start, length);
10627 name[length] = '=';
10628
10629 // Now switch the name to the new string.
10630 // This silences clang analyzer warning about leak of memory pointed by `name`.
10631 // NOLINTNEXTLINE(clang-analyzer-*)
10632 *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
10633}
10634
10638static pm_node_t *
10639parse_target(pm_parser_t *parser, pm_node_t *target) {
10640 switch (PM_NODE_TYPE(target)) {
10641 case PM_MISSING_NODE:
10642 return target;
10646 return target;
10650 return target;
10652 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
10653 target->type = PM_CONSTANT_TARGET_NODE;
10654 return target;
10657 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
10658 return target;
10662 return target;
10664 if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
10665 PM_PARSER_ERR_NODE_FORMAT(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
10666 } else {
10669 }
10670
10671 return target;
10675 return target;
10677 return target;
10678 case PM_SPLAT_NODE: {
10679 pm_splat_node_t *splat = (pm_splat_node_t *) target;
10680
10681 if (splat->expression != NULL) {
10682 splat->expression = parse_target(parser, splat->expression);
10683 }
10684
10685 return (pm_node_t *) splat;
10686 }
10687 case PM_CALL_NODE: {
10688 pm_call_node_t *call = (pm_call_node_t *) target;
10689
10690 // If we have no arguments to the call node and we need this to be a
10691 // target then this is either a method call or a local variable write.
10692 if (
10693 (call->message_loc.start != NULL) &&
10694 (call->message_loc.end[-1] != '!') &&
10695 (call->message_loc.end[-1] != '?') &&
10696 (call->opening_loc.start == NULL) &&
10697 (call->arguments == NULL) &&
10698 (call->block == NULL)
10699 ) {
10700 if (call->receiver == NULL) {
10701 // When we get here, we have a local variable write, because it
10702 // was previously marked as a method call but now we have an =.
10703 // This looks like:
10704 //
10705 // foo = 1
10706 //
10707 // When it was parsed in the prefix position, foo was seen as a
10708 // method call with no receiver and no arguments. Now we have an
10709 // =, so we know it's a local variable write.
10710 const pm_location_t message = call->message_loc;
10711
10712 pm_parser_local_add_location(parser, message.start, message.end);
10713 pm_node_destroy(parser, target);
10714
10715 uint32_t depth = 0;
10716 const pm_token_t name = { .type = PM_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
10717 target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name, depth);
10718
10721
10722 pm_refute_numbered_parameter(parser, message.start, message.end);
10723 return target;
10724 }
10725
10726 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10727 parse_write_name(parser, &call->name);
10728 return (pm_node_t *) pm_call_target_node_create(parser, call);
10729 }
10730 }
10731
10732 // If there is no call operator and the message is "[]" then this is
10733 // an aref expression, and we can transform it into an aset
10734 // expression.
10735 if (pm_call_node_index_p(call)) {
10736 return (pm_node_t *) pm_index_target_node_create(parser, call);
10737 }
10738 }
10739 /* fallthrough */
10740 default:
10741 // In this case we have a node that we don't know how to convert
10742 // into a target. We need to treat it as an error. For now, we'll
10743 // mark it as an error and just skip right past it.
10744 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
10745 return target;
10746 }
10747}
10748
10753static pm_node_t *
10754parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
10755 pm_node_t *result = parse_target(parser, target);
10756
10757 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in parens after the targets.
10758 if (
10759 !match1(parser, PM_TOKEN_EQUAL) &&
10760 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
10761 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
10762 ) {
10763 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
10764 }
10765
10766 return result;
10767}
10768
10772static pm_node_t *
10773parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
10774 switch (PM_NODE_TYPE(target)) {
10775 case PM_MISSING_NODE:
10776 pm_node_destroy(parser, value);
10777 return target;
10779 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
10780 pm_node_destroy(parser, target);
10781 return (pm_node_t *) node;
10782 }
10784 return (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
10785 case PM_CONSTANT_READ_NODE: {
10786 pm_constant_write_node_t *node = pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
10787 if (context_def_p(parser)) {
10788 pm_parser_err_node(parser, (pm_node_t *) node, PM_ERR_WRITE_TARGET_IN_METHOD);
10789 }
10790 pm_node_destroy(parser, target);
10791 return (pm_node_t *) node;
10792 }
10795 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
10796 /* fallthrough */
10798 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
10799 pm_node_destroy(parser, target);
10800 return (pm_node_t *) node;
10801 }
10803 pm_refute_numbered_parameter(parser, target->location.start, target->location.end);
10805
10806 pm_constant_id_t constant_id = local_read->name;
10807 uint32_t depth = local_read->depth;
10808
10809 pm_location_t name_loc = target->location;
10810 pm_node_destroy(parser, target);
10811
10812 return (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, depth, value, &name_loc, operator);
10813 }
10815 pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
10816 pm_node_destroy(parser, target);
10817 return write_node;
10818 }
10820 return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
10821 case PM_SPLAT_NODE: {
10822 pm_splat_node_t *splat = (pm_splat_node_t *) target;
10823
10824 if (splat->expression != NULL) {
10825 splat->expression = parse_write(parser, splat->expression, operator, value);
10826 }
10827
10828 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
10829 pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
10830
10831 return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
10832 }
10833 case PM_CALL_NODE: {
10834 pm_call_node_t *call = (pm_call_node_t *) target;
10835
10836 // If we have no arguments to the call node and we need this to be a
10837 // target then this is either a method call or a local variable
10838 // write.
10839 if (
10840 (call->message_loc.start != NULL) &&
10841 (call->message_loc.end[-1] != '!') &&
10842 (call->message_loc.end[-1] != '?') &&
10843 (call->opening_loc.start == NULL) &&
10844 (call->arguments == NULL) &&
10845 (call->block == NULL)
10846 ) {
10847 if (call->receiver == NULL) {
10848 // When we get here, we have a local variable write, because it
10849 // was previously marked as a method call but now we have an =.
10850 // This looks like:
10851 //
10852 // foo = 1
10853 //
10854 // When it was parsed in the prefix position, foo was seen as a
10855 // method call with no receiver and no arguments. Now we have an
10856 // =, so we know it's a local variable write.
10857 const pm_location_t message = call->message_loc;
10858
10859 pm_parser_local_add_location(parser, message.start, message.end);
10860 pm_node_destroy(parser, target);
10861
10862 pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
10863 target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
10864
10865 pm_refute_numbered_parameter(parser, message.start, message.end);
10866 return target;
10867 }
10868
10869 if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
10870 // When we get here, we have a method call, because it was
10871 // previously marked as a method call but now we have an =. This
10872 // looks like:
10873 //
10874 // foo.bar = 1
10875 //
10876 // When it was parsed in the prefix position, foo.bar was seen as a
10877 // method call with no arguments. Now we have an =, so we know it's
10878 // a method call with an argument. In this case we will create the
10879 // arguments node, parse the argument, and add it to the list.
10880 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
10881 call->arguments = arguments;
10882
10883 pm_arguments_node_arguments_append(arguments, value);
10884 call->base.location.end = arguments->base.location.end;
10885
10886 parse_write_name(parser, &call->name);
10887 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE);
10888 return (pm_node_t *) call;
10889 }
10890 }
10891
10892 // If there is no call operator and the message is "[]" then this is
10893 // an aref expression, and we can transform it into an aset
10894 // expression.
10895 if (pm_call_node_index_p(call)) {
10896 if (call->arguments == NULL) {
10897 call->arguments = pm_arguments_node_create(parser);
10898 }
10899
10900 pm_arguments_node_arguments_append(call->arguments, value);
10901 target->location.end = value->location.end;
10902
10903 // Replace the name with "[]=".
10904 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
10905 pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE);
10906 return target;
10907 }
10908
10909 // If there are arguments on the call node, then it can't be a method
10910 // call ending with = or a local variable write, so it must be a
10911 // syntax error. In this case we'll fall through to our default
10912 // handling. We need to free the value that we parsed because there
10913 // is no way for us to attach it to the tree at this point.
10914 pm_node_destroy(parser, value);
10915 }
10916 /* fallthrough */
10917 default:
10918 // In this case we have a node that we don't know how to convert into a
10919 // target. We need to treat it as an error. For now, we'll mark it as an
10920 // error and just skip right past it.
10921 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
10922 return target;
10923 }
10924}
10925
10936static pm_node_t *
10937parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
10938 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
10939
10940 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
10941 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target));
10942
10943 while (accept1(parser, PM_TOKEN_COMMA)) {
10944 if (accept1(parser, PM_TOKEN_USTAR)) {
10945 // Here we have a splat operator. It can have a name or be
10946 // anonymous. It can be the final target or be in the middle if
10947 // there haven't been any others yet.
10948 if (has_rest) {
10949 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
10950 }
10951
10952 pm_token_t star_operator = parser->previous;
10953 pm_node_t *name = NULL;
10954
10955 if (token_begins_expression_p(parser->current.type)) {
10956 name = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
10957 name = parse_target(parser, name);
10958 }
10959
10960 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
10961 pm_multi_target_node_targets_append(parser, result, splat);
10962 has_rest = true;
10963 } else if (token_begins_expression_p(parser->current.type)) {
10964 pm_node_t *target = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
10965 target = parse_target(parser, target);
10966
10967 pm_multi_target_node_targets_append(parser, result, target);
10968 } else if (!match1(parser, PM_TOKEN_EOF)) {
10969 // If we get here, then we have a trailing , in a multi target node.
10970 // We'll set the implicit rest flag to indicate this.
10971 pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
10972 pm_multi_target_node_targets_append(parser, result, rest);
10973 break;
10974 }
10975 }
10976
10977 return (pm_node_t *) result;
10978}
10979
10984static pm_node_t *
10985parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power) {
10986 pm_node_t *result = parse_targets(parser, first_target, binding_power);
10987
10988 // Ensure that we have either an = or a ) after the targets.
10989 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
10990 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
10991 }
10992
10993 return result;
10994}
10995
10999static pm_statements_node_t *
11000parse_statements(pm_parser_t *parser, pm_context_t context) {
11001 // First, skip past any optional terminators that might be at the beginning of
11002 // the statements.
11003 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
11004
11005 // If we have a terminator, then we can just return NULL.
11006 if (context_terminator(context, &parser->current)) return NULL;
11007
11008 pm_statements_node_t *statements = pm_statements_node_create(parser);
11009
11010 // At this point we know we have at least one statement, and that it
11011 // immediately follows the current token.
11012 context_push(parser, context);
11013
11014 while (true) {
11015 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
11016 pm_statements_node_body_append(statements, node);
11017
11018 // If we're recovering from a syntax error, then we need to stop parsing the
11019 // statements now.
11020 if (parser->recovering) {
11021 // If this is the level of context where the recovery has happened, then
11022 // we can mark the parser as done recovering.
11023 if (context_terminator(context, &parser->current)) parser->recovering = false;
11024 break;
11025 }
11026
11027 // If we have a terminator, then we will parse all consequtive terminators
11028 // and then continue parsing the statements list.
11029 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11030 // If we have a terminator, then we will continue parsing the statements
11031 // list.
11032 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
11033 if (context_terminator(context, &parser->current)) break;
11034
11035 // Now we can continue parsing the list of statements.
11036 continue;
11037 }
11038
11039 // At this point we have a list of statements that are not terminated by a
11040 // newline or semicolon. At this point we need to check if we're at the end
11041 // of the statements list. If we are, then we should break out of the loop.
11042 if (context_terminator(context, &parser->current)) break;
11043
11044 // At this point, we have a syntax error, because the statement was not
11045 // terminated by a newline or semicolon, and we're not at the end of the
11046 // statements list. Ideally we should scan forward to determine if we should
11047 // insert a missing terminator or break out of parsing the statements list
11048 // at this point.
11049 //
11050 // We don't have that yet, so instead we'll do a more naive approach. If we
11051 // were unable to parse an expression, then we will skip past this token and
11052 // continue parsing the statements list. Otherwise we'll add an error and
11053 // continue parsing the statements list.
11054 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
11055 parser_lex(parser);
11056
11057 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
11058 if (context_terminator(context, &parser->current)) break;
11059 } else {
11060 expect1(parser, PM_TOKEN_NEWLINE, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
11061 }
11062 }
11063
11064 context_pop(parser);
11065 return statements;
11066}
11067
11071static bool
11072parse_assocs(pm_parser_t *parser, pm_node_t *node) {
11074 bool contains_keyword_splat = false;
11075
11076 while (true) {
11077 pm_node_t *element;
11078
11079 switch (parser->current.type) {
11080 case PM_TOKEN_USTAR_STAR: {
11081 parser_lex(parser);
11082 pm_token_t operator = parser->previous;
11083 pm_node_t *value = NULL;
11084
11085 if (token_begins_expression_p(parser->current.type)) {
11086 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11087 } else if (pm_parser_local_depth(parser, &operator) == -1) {
11088 pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
11089 }
11090
11091 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
11092 contains_keyword_splat = true;
11093 break;
11094 }
11095 case PM_TOKEN_LABEL: {
11096 pm_token_t label = parser->current;
11097 parser_lex(parser);
11098
11099 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
11100 pm_token_t operator = not_provided(parser);
11101 pm_node_t *value = NULL;
11102
11103 if (token_begins_expression_p(parser->current.type)) {
11104 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL);
11105 } else {
11106 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
11107 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
11108 value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
11109 } else {
11110 int depth = pm_parser_local_depth(parser, &((pm_token_t) { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 }));
11111 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
11112
11113 if (depth == -1) {
11114 value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
11115 } else {
11116 value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
11117 }
11118 }
11119
11120 value->location.end++;
11121 value = (pm_node_t *) pm_implicit_node_create(parser, value);
11122 }
11123
11124 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
11125 break;
11126 }
11127 default: {
11128 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_KEY);
11129 pm_token_t operator;
11130
11131 if (pm_symbol_node_label_p(key)) {
11132 operator = not_provided(parser);
11133 } else {
11134 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
11135 operator = parser->previous;
11136 }
11137
11138 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
11139 element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
11140 break;
11141 }
11142 }
11143
11144 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
11145 pm_hash_node_elements_append((pm_hash_node_t *) node, element);
11146 } else {
11147 pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
11148 }
11149
11150 // If there's no comma after the element, then we're done.
11151 if (!accept1(parser, PM_TOKEN_COMMA)) break;
11152
11153 // If the next element starts with a label or a **, then we know we have
11154 // another element in the hash, so we'll continue parsing.
11155 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
11156
11157 // Otherwise we need to check if the subsequent token begins an expression.
11158 // If it does, then we'll continue parsing.
11159 if (token_begins_expression_p(parser->current.type)) continue;
11160
11161 // Otherwise by default we will exit out of this loop.
11162 break;
11163 }
11164 return contains_keyword_splat;
11165}
11166
11170static inline void
11171parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
11172 if (arguments->arguments == NULL) {
11173 arguments->arguments = pm_arguments_node_create(parser);
11174 }
11175
11176 pm_arguments_node_arguments_append(arguments->arguments, argument);
11177}
11178
11182static void
11183parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator) {
11184 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
11185
11186 // First we need to check if the next token is one that could be the start of
11187 // an argument. If it's not, then we can just return.
11188 if (
11189 match2(parser, terminator, PM_TOKEN_EOF) ||
11190 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
11191 context_terminator(parser->current_context->context, &parser->current)
11192 ) {
11193 return;
11194 }
11195
11196 bool parsed_first_argument = false;
11197 bool parsed_bare_hash = false;
11198 bool parsed_block_argument = false;
11199 bool parsed_forwarding_arguments = false;
11200
11201 while (!match1(parser, PM_TOKEN_EOF)) {
11202 if (parsed_block_argument) {
11203 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
11204 }
11205 if (parsed_forwarding_arguments) {
11206 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
11207 }
11208
11209 pm_node_t *argument = NULL;
11210
11211 switch (parser->current.type) {
11213 case PM_TOKEN_LABEL: {
11214 if (parsed_bare_hash) {
11215 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
11216 }
11217
11218 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
11219 argument = (pm_node_t *) hash;
11220
11221 bool contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
11222 parsed_bare_hash = true;
11223 parse_arguments_append(parser, arguments, argument);
11224 if (contains_keyword_splat) {
11226 }
11227 break;
11228 }
11229 case PM_TOKEN_UAMPERSAND: {
11230 parser_lex(parser);
11231 pm_token_t operator = parser->previous;
11232 pm_node_t *expression = NULL;
11233
11234 if (token_begins_expression_p(parser->current.type)) {
11235 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_ARGUMENT);
11236 } else {
11237 if (pm_parser_local_depth(parser, &operator) == -1) {
11238 // A block forwarding in a method having `...` parameter (e.g. `def foo(...); bar(&); end`) is available.
11239 pm_constant_id_t ellipsis_id = pm_parser_constant_id_constant(parser, "...", 3);
11240 if (pm_parser_local_depth_constant_id(parser, ellipsis_id) == -1) {
11241 pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
11242 }
11243 }
11244 }
11245
11246 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
11247 if (parsed_block_argument) {
11248 parse_arguments_append(parser, arguments, argument);
11249 } else {
11250 arguments->block = argument;
11251 }
11252
11253 parsed_block_argument = true;
11254 break;
11255 }
11256 case PM_TOKEN_USTAR: {
11257 parser_lex(parser);
11258 pm_token_t operator = parser->previous;
11259
11261 if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11262 pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
11263 }
11264
11265 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
11266 } else {
11267 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
11268
11269 if (parsed_bare_hash) {
11270 pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
11271 }
11272
11273 argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
11274 }
11275
11276 parse_arguments_append(parser, arguments, argument);
11277 break;
11278 }
11279 case PM_TOKEN_UDOT_DOT_DOT: {
11280 if (accepts_forwarding) {
11281 parser_lex(parser);
11282
11283 if (token_begins_expression_p(parser->current.type)) {
11284 // If the token begins an expression then this ... was not actually
11285 // argument forwarding but was instead a range.
11286 pm_token_t operator = parser->previous;
11287 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
11288 argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
11289 } else {
11290 if (pm_parser_local_depth(parser, &parser->previous) == -1) {
11291 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
11292 }
11293 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
11294 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
11295 }
11296
11297 argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
11298 parse_arguments_append(parser, arguments, argument);
11299 parsed_forwarding_arguments = true;
11300 break;
11301 }
11302 }
11303 }
11304 /* fallthrough */
11305 default: {
11306 if (argument == NULL) {
11307 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, PM_ERR_EXPECT_ARGUMENT);
11308 }
11309
11310 bool contains_keyword_splat = false;
11311 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
11312 if (parsed_bare_hash) {
11313 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
11314 }
11315
11316 pm_token_t operator;
11317 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
11318 operator = parser->previous;
11319 } else {
11320 operator = not_provided(parser);
11321 }
11322
11323 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
11324
11325 // Finish parsing the one we are part way through
11326 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
11327
11328 argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
11329 pm_keyword_hash_node_elements_append(bare_hash, argument);
11330 argument = (pm_node_t *) bare_hash;
11331
11332 // Then parse more if we have a comma
11333 if (accept1(parser, PM_TOKEN_COMMA) && (
11334 token_begins_expression_p(parser->current.type) ||
11335 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
11336 )) {
11337 contains_keyword_splat = parse_assocs(parser, (pm_node_t *) bare_hash);
11338 }
11339
11340 parsed_bare_hash = true;
11341 }
11342
11343 parse_arguments_append(parser, arguments, argument);
11344 if (contains_keyword_splat) {
11346 }
11347 break;
11348 }
11349 }
11350
11351 parsed_first_argument = true;
11352
11353 // If parsing the argument failed, we need to stop parsing arguments.
11354 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
11355
11356 // If the terminator of these arguments is not EOF, then we have a specific
11357 // token we're looking for. In that case we can accept a newline here
11358 // because it is not functioning as a statement terminator.
11359 if (terminator != PM_TOKEN_EOF) accept1(parser, PM_TOKEN_NEWLINE);
11360
11361 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
11362 // If we previously were on a comma and we just parsed a bare hash, then
11363 // we want to continue parsing arguments. This is because the comma was
11364 // grabbed up by the hash parser.
11365 } else {
11366 // If there is no comma at the end of the argument list then we're done
11367 // parsing arguments and can break out of this loop.
11368 if (!accept1(parser, PM_TOKEN_COMMA)) break;
11369 }
11370
11371 // If we hit the terminator, then that means we have a trailing comma so we
11372 // can accept that output as well.
11373 if (match1(parser, terminator)) break;
11374 }
11375}
11376
11388parse_required_destructured_parameter(pm_parser_t *parser) {
11389 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
11390
11391 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
11392 pm_multi_target_node_opening_set(node, &parser->previous);
11393
11394 do {
11395 pm_node_t *param;
11396
11397 // If we get here then we have a trailing comma, which isn't allowed in
11398 // the grammar. In other places, multi targets _do_ allow trailing
11399 // commas, so here we'll assume this is a mistake of the user not
11400 // knowing it's not allowed here.
11401 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
11402 param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
11403 pm_multi_target_node_targets_append(parser, node, param);
11404 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
11405 break;
11406 }
11407
11408 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
11409 param = (pm_node_t *) parse_required_destructured_parameter(parser);
11410 } else if (accept1(parser, PM_TOKEN_USTAR)) {
11411 pm_token_t star = parser->previous;
11412 pm_node_t *value = NULL;
11413
11414 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11415 pm_token_t name = parser->previous;
11416 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11417 pm_parser_parameter_name_check(parser, &name);
11418 pm_parser_local_add_token(parser, &name);
11419 }
11420
11421 param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
11422 } else {
11423 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
11424 pm_token_t name = parser->previous;
11425
11426 param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
11427 pm_parser_parameter_name_check(parser, &name);
11428 pm_parser_local_add_token(parser, &name);
11429 }
11430
11431 pm_multi_target_node_targets_append(parser, node, param);
11432 } while (accept1(parser, PM_TOKEN_COMMA));
11433
11434 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
11435 pm_multi_target_node_closing_set(node, &parser->previous);
11436
11437 return node;
11438}
11439
11444typedef enum {
11445 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
11446 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
11447 PM_PARAMETERS_ORDER_KEYWORDS_REST,
11448 PM_PARAMETERS_ORDER_KEYWORDS,
11449 PM_PARAMETERS_ORDER_REST,
11450 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
11451 PM_PARAMETERS_ORDER_OPTIONAL,
11452 PM_PARAMETERS_ORDER_NAMED,
11453 PM_PARAMETERS_ORDER_NONE,
11454
11455} pm_parameters_order_t;
11456
11460static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
11461 [0] = PM_PARAMETERS_NO_CHANGE,
11462 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
11463 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
11464 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
11465 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
11466 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
11467 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
11468 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
11469 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
11470 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
11471 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
11472 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
11473};
11474
11480static void
11481update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
11482 pm_parameters_order_t state = parameters_ordering[token->type];
11483 if (state == PM_PARAMETERS_NO_CHANGE) return;
11484
11485 // If we see another ordered argument after a optional argument
11486 // we only continue parsing ordered arguments until we stop seeing ordered arguments
11487 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
11488 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
11489 return;
11490 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
11491 return;
11492 }
11493
11494 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
11495 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
11496 }
11497
11498 if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
11499 // We know what transition we failed on, so we can provide a better error here.
11500 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
11501 } else if (state < *current) {
11502 *current = state;
11503 }
11504}
11505
11509static pm_parameters_node_t *
11510parse_parameters(
11511 pm_parser_t *parser,
11512 pm_binding_power_t binding_power,
11513 bool uses_parentheses,
11514 bool allows_trailing_comma,
11515 bool allows_forwarding_parameters
11516) {
11517 pm_parameters_node_t *params = pm_parameters_node_create(parser);
11518 bool looping = true;
11519
11520 pm_do_loop_stack_push(parser, false);
11521 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
11522
11523 do {
11524 switch (parser->current.type) {
11526 update_parameter_state(parser, &parser->current, &order);
11527 pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
11528
11529 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
11530 pm_parameters_node_requireds_append(params, param);
11531 } else {
11532 pm_parameters_node_posts_append(params, param);
11533 }
11534 break;
11535 }
11537 case PM_TOKEN_AMPERSAND: {
11538 update_parameter_state(parser, &parser->current, &order);
11539 parser_lex(parser);
11540
11541 pm_token_t operator = parser->previous;
11542 pm_token_t name;
11543
11544 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11545 name = parser->previous;
11546 pm_parser_parameter_name_check(parser, &name);
11547 pm_parser_local_add_token(parser, &name);
11548 } else {
11549 name = not_provided(parser);
11550
11551 if (allows_forwarding_parameters) {
11552 pm_parser_local_add_token(parser, &operator);
11553 }
11554 }
11555
11556 pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
11557 if (params->block == NULL) {
11558 pm_parameters_node_block_set(params, param);
11559 } else {
11560 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
11561 pm_parameters_node_posts_append(params, (pm_node_t *) param);
11562 }
11563
11564 break;
11565 }
11566 case PM_TOKEN_UDOT_DOT_DOT: {
11567 if (!allows_forwarding_parameters) {
11568 pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
11569 }
11570
11571 if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
11572 update_parameter_state(parser, &parser->current, &order);
11573 parser_lex(parser);
11574
11575 if (allows_forwarding_parameters) {
11576 pm_parser_local_add_token(parser, &parser->previous);
11577 }
11578
11579 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
11580 if (params->keyword_rest != NULL) {
11581 // If we already have a keyword rest parameter, then we replace it with the
11582 // forwarding parameter and move the keyword rest parameter to the posts list.
11583 pm_node_t *keyword_rest = params->keyword_rest;
11584 pm_parameters_node_posts_append(params, keyword_rest);
11585 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
11586 params->keyword_rest = NULL;
11587 }
11588 pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
11589 } else {
11590 update_parameter_state(parser, &parser->current, &order);
11591 parser_lex(parser);
11592 }
11593
11594 break;
11595 }
11598 case PM_TOKEN_CONSTANT:
11601 case PM_TOKEN_METHOD_NAME: {
11602 parser_lex(parser);
11603 switch (parser->previous.type) {
11604 case PM_TOKEN_CONSTANT:
11605 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
11606 break;
11608 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
11609 break;
11611 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
11612 break;
11614 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
11615 break;
11617 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
11618 break;
11619 default: break;
11620 }
11621
11622 if (parser->current.type == PM_TOKEN_EQUAL) {
11623 update_parameter_state(parser, &parser->current, &order);
11624 } else {
11625 update_parameter_state(parser, &parser->previous, &order);
11626 }
11627
11628 pm_token_t name = parser->previous;
11629 pm_parser_parameter_name_check(parser, &name);
11630 pm_parser_local_add_token(parser, &name);
11631
11632 if (accept1(parser, PM_TOKEN_EQUAL)) {
11633 pm_token_t operator = parser->previous;
11634 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11635 pm_constant_id_t old_param_name = parser->current_param_name;
11636 parser->current_param_name = pm_parser_constant_id_token(parser, &name);
11637 pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT);
11638
11639 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
11640 pm_parameters_node_optionals_append(params, param);
11641
11642 parser->current_param_name = old_param_name;
11643 context_pop(parser);
11644
11645 // If parsing the value of the parameter resulted in error recovery,
11646 // then we can put a missing node in its place and stop parsing the
11647 // parameters entirely now.
11648 if (parser->recovering) {
11649 looping = false;
11650 break;
11651 }
11652 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
11653 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
11654 pm_parameters_node_requireds_append(params, (pm_node_t *) param);
11655 } else {
11656 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
11657 pm_parameters_node_posts_append(params, (pm_node_t *) param);
11658 }
11659
11660 break;
11661 }
11662 case PM_TOKEN_LABEL: {
11663 if (!uses_parentheses) parser->in_keyword_arg = true;
11664 update_parameter_state(parser, &parser->current, &order);
11665 parser_lex(parser);
11666
11667 pm_token_t name = parser->previous;
11668 pm_token_t local = name;
11669 local.end -= 1;
11670
11671 pm_parser_parameter_name_check(parser, &local);
11672 pm_parser_local_add_token(parser, &local);
11673
11674 switch (parser->current.type) {
11675 case PM_TOKEN_COMMA:
11677 case PM_TOKEN_PIPE: {
11678 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
11679 pm_parameters_node_keywords_append(params, param);
11680 break;
11681 }
11682 case PM_TOKEN_SEMICOLON:
11683 case PM_TOKEN_NEWLINE: {
11684 if (uses_parentheses) {
11685 looping = false;
11686 break;
11687 }
11688
11689 pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
11690 pm_parameters_node_keywords_append(params, param);
11691 break;
11692 }
11693 default: {
11694 pm_node_t *param;
11695
11696 if (token_begins_expression_p(parser->current.type)) {
11697 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
11698 pm_constant_id_t old_param_name = parser->current_param_name;
11699 parser->current_param_name = pm_parser_constant_id_token(parser, &local);
11700 pm_node_t *value = parse_value_expression(parser, binding_power, false, PM_ERR_PARAMETER_NO_DEFAULT_KW);
11701 parser->current_param_name = old_param_name;
11702 context_pop(parser);
11703 param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
11704 }
11705 else {
11706 param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
11707 }
11708
11709 pm_parameters_node_keywords_append(params, param);
11710
11711 // If parsing the value of the parameter resulted in error recovery,
11712 // then we can put a missing node in its place and stop parsing the
11713 // parameters entirely now.
11714 if (parser->recovering) {
11715 looping = false;
11716 break;
11717 }
11718 }
11719 }
11720
11721 parser->in_keyword_arg = false;
11722 break;
11723 }
11724 case PM_TOKEN_USTAR:
11725 case PM_TOKEN_STAR: {
11726 update_parameter_state(parser, &parser->current, &order);
11727 parser_lex(parser);
11728
11729 pm_token_t operator = parser->previous;
11730 pm_token_t name;
11731
11732 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11733 name = parser->previous;
11734 pm_parser_parameter_name_check(parser, &name);
11735 pm_parser_local_add_token(parser, &name);
11736 } else {
11737 name = not_provided(parser);
11738
11739 if (allows_forwarding_parameters) {
11740 pm_parser_local_add_token(parser, &operator);
11741 }
11742 }
11743
11744 pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
11745 if (params->rest == NULL) {
11746 pm_parameters_node_rest_set(params, param);
11747 } else {
11748 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
11749 pm_parameters_node_posts_append(params, param);
11750 }
11751
11752 break;
11753 }
11754 case PM_TOKEN_STAR_STAR:
11755 case PM_TOKEN_USTAR_STAR: {
11756 update_parameter_state(parser, &parser->current, &order);
11757 parser_lex(parser);
11758
11759 pm_token_t operator = parser->previous;
11760 pm_node_t *param;
11761
11762 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
11763 param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
11764 } else {
11765 pm_token_t name;
11766
11767 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
11768 name = parser->previous;
11769 pm_parser_parameter_name_check(parser, &name);
11770 pm_parser_local_add_token(parser, &name);
11771 } else {
11772 name = not_provided(parser);
11773
11774 if (allows_forwarding_parameters) {
11775 pm_parser_local_add_token(parser, &operator);
11776 }
11777 }
11778
11779 param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
11780 }
11781
11782 if (params->keyword_rest == NULL) {
11783 pm_parameters_node_keyword_rest_set(params, param);
11784 } else {
11785 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
11786 pm_parameters_node_posts_append(params, param);
11787 }
11788
11789 break;
11790 }
11791 default:
11792 if (parser->previous.type == PM_TOKEN_COMMA) {
11793 if (allows_trailing_comma) {
11794 // If we get here, then we have a trailing comma in a
11795 // block parameter list.
11796 pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
11797
11798 if (params->rest == NULL) {
11799 pm_parameters_node_rest_set(params, param);
11800 } else {
11801 pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
11802 pm_parameters_node_posts_append(params, (pm_node_t *) param);
11803 }
11804 } else {
11805 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
11806 }
11807 }
11808
11809 looping = false;
11810 break;
11811 }
11812
11813 if (looping && uses_parentheses) {
11814 accept1(parser, PM_TOKEN_NEWLINE);
11815 }
11816 } while (looping && accept1(parser, PM_TOKEN_COMMA));
11817
11818 pm_do_loop_stack_pop(parser);
11819
11820 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
11821 if (params->base.location.start == params->base.location.end) {
11822 pm_node_destroy(parser, (pm_node_t *) params);
11823 return NULL;
11824 }
11825
11826 return params;
11827}
11828
11833static inline void
11834parse_rescues(pm_parser_t *parser, pm_begin_node_t *parent_node, bool def_p) {
11835 pm_rescue_node_t *current = NULL;
11836
11837 while (accept1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
11838 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
11839
11840 switch (parser->current.type) {
11842 // Here we have an immediate => after the rescue keyword, in which case
11843 // we're going to have an empty list of exceptions to rescue (which
11844 // implies StandardError).
11845 parser_lex(parser);
11846 pm_rescue_node_operator_set(rescue, &parser->previous);
11847
11848 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
11849 reference = parse_target(parser, reference);
11850
11851 pm_rescue_node_reference_set(rescue, reference);
11852 break;
11853 }
11854 case PM_TOKEN_NEWLINE:
11855 case PM_TOKEN_SEMICOLON:
11857 // Here we have a terminator for the rescue keyword, in which case we're
11858 // going to just continue on.
11859 break;
11860 default: {
11861 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
11862 // Here we have something that could be an exception expression, so
11863 // we'll attempt to parse it here and any others delimited by commas.
11864
11865 do {
11866 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION);
11867 pm_rescue_node_exceptions_append(rescue, expression);
11868
11869 // If we hit a newline, then this is the end of the rescue expression. We
11870 // can continue on to parse the statements.
11871 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
11872
11873 // If we hit a `=>` then we're going to parse the exception variable. Once
11874 // we've done that, we'll break out of the loop and parse the statements.
11875 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
11876 pm_rescue_node_operator_set(rescue, &parser->previous);
11877
11878 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_RESCUE_VARIABLE);
11879 reference = parse_target(parser, reference);
11880
11881 pm_rescue_node_reference_set(rescue, reference);
11882 break;
11883 }
11884 } while (accept1(parser, PM_TOKEN_COMMA));
11885 }
11886 }
11887 }
11888
11889 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
11890 accept1(parser, PM_TOKEN_KEYWORD_THEN);
11891 } else {
11892 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
11893 }
11894
11896 pm_accepts_block_stack_push(parser, true);
11897 pm_statements_node_t *statements = parse_statements(parser, def_p ? PM_CONTEXT_RESCUE_DEF : PM_CONTEXT_RESCUE);
11898 if (statements) {
11899 pm_rescue_node_statements_set(rescue, statements);
11900 }
11901 pm_accepts_block_stack_pop(parser);
11902 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11903 }
11904
11905 if (current == NULL) {
11906 pm_begin_node_rescue_clause_set(parent_node, rescue);
11907 } else {
11908 pm_rescue_node_consequent_set(current, rescue);
11909 }
11910
11911 current = rescue;
11912 }
11913
11914 // The end node locations on rescue nodes will not be set correctly
11915 // since we won't know the end until we've found all consequent
11916 // clauses. This sets the end location on all rescues once we know it
11917 if (current) {
11918 const uint8_t *end_to_set = current->base.location.end;
11919 current = parent_node->rescue_clause;
11920 while (current) {
11921 current->base.location.end = end_to_set;
11922 current = current->consequent;
11923 }
11924 }
11925
11926 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
11927 pm_token_t else_keyword = parser->previous;
11928 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11929
11930 pm_statements_node_t *else_statements = NULL;
11931 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
11932 pm_accepts_block_stack_push(parser, true);
11933 else_statements = parse_statements(parser, def_p ? PM_CONTEXT_RESCUE_ELSE_DEF : PM_CONTEXT_RESCUE_ELSE);
11934 pm_accepts_block_stack_pop(parser);
11935 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11936 }
11937
11938 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
11939 pm_begin_node_else_clause_set(parent_node, else_clause);
11940 }
11941
11942 if (accept1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
11943 pm_token_t ensure_keyword = parser->previous;
11944 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11945
11946 pm_statements_node_t *ensure_statements = NULL;
11947 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
11948 pm_accepts_block_stack_push(parser, true);
11949 ensure_statements = parse_statements(parser, def_p ? PM_CONTEXT_ENSURE_DEF : PM_CONTEXT_ENSURE);
11950 pm_accepts_block_stack_pop(parser);
11951 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
11952 }
11953
11954 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
11955 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
11956 }
11957
11958 if (parser->current.type == PM_TOKEN_KEYWORD_END) {
11959 pm_begin_node_end_keyword_set(parent_node, &parser->current);
11960 } else {
11961 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
11962 pm_begin_node_end_keyword_set(parent_node, &end_keyword);
11963 }
11964}
11965
11966static inline pm_begin_node_t *
11967parse_rescues_as_begin(pm_parser_t *parser, pm_statements_node_t *statements, bool def_p) {
11968 pm_token_t no_begin_token = not_provided(parser);
11969 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &no_begin_token, statements);
11970 parse_rescues(parser, begin_node, def_p);
11971
11972 // All nodes within a begin node are optional, so we look
11973 // for the earliest possible node that we can use to set
11974 // the BeginNode's start location
11975 const uint8_t *start = begin_node->base.location.start;
11976 if (begin_node->statements) {
11977 start = begin_node->statements->base.location.start;
11978 } else if (begin_node->rescue_clause) {
11979 start = begin_node->rescue_clause->base.location.start;
11980 } else if (begin_node->else_clause) {
11981 start = begin_node->else_clause->base.location.start;
11982 } else if (begin_node->ensure_clause) {
11983 start = begin_node->ensure_clause->base.location.start;
11984 }
11985
11986 begin_node->base.location.start = start;
11987 return begin_node;
11988}
11989
11994parse_block_parameters(
11995 pm_parser_t *parser,
11996 bool allows_trailing_comma,
11997 const pm_token_t *opening,
11998 bool is_lambda_literal
11999) {
12000 pm_parameters_node_t *parameters = NULL;
12001 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
12002 parameters = parse_parameters(
12003 parser,
12004 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
12005 false,
12006 allows_trailing_comma,
12007 false
12008 );
12009 }
12010
12011 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
12012 if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
12013 do {
12014 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
12015 pm_parser_parameter_name_check(parser, &parser->previous);
12016 pm_parser_local_add_token(parser, &parser->previous);
12017
12018 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
12019 pm_block_parameters_node_append_local(block_parameters, local);
12020 } while (accept1(parser, PM_TOKEN_COMMA));
12021 }
12022
12023 return block_parameters;
12024}
12025
12029static pm_block_node_t *
12030parse_block(pm_parser_t *parser) {
12031 pm_token_t opening = parser->previous;
12032 accept1(parser, PM_TOKEN_NEWLINE);
12033
12034 pm_accepts_block_stack_push(parser, true);
12035 pm_parser_scope_push(parser, false);
12036 pm_block_parameters_node_t *block_parameters = NULL;
12037
12038 if (accept1(parser, PM_TOKEN_PIPE)) {
12039 parser->current_scope->explicit_params = true;
12040 pm_token_t block_parameters_opening = parser->previous;
12041
12042 if (match1(parser, PM_TOKEN_PIPE)) {
12043 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
12044 parser->command_start = true;
12045 parser_lex(parser);
12046 } else {
12047 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false);
12048 accept1(parser, PM_TOKEN_NEWLINE);
12049 parser->command_start = true;
12050 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
12051 }
12052
12053 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
12054 }
12055
12056 uint32_t locals_body_index = 0;
12057
12058 if (block_parameters) {
12059 locals_body_index = (uint32_t) parser->current_scope->locals.size;
12060 }
12061
12062 accept1(parser, PM_TOKEN_NEWLINE);
12063 pm_node_t *statements = NULL;
12064
12065 if (opening.type == PM_TOKEN_BRACE_LEFT) {
12066 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
12067 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES);
12068 }
12069
12070 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
12071 } else {
12072 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
12074 pm_accepts_block_stack_push(parser, true);
12075 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS);
12076 pm_accepts_block_stack_pop(parser);
12077 }
12078
12079 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
12080 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
12081 statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
12082 }
12083 }
12084
12085 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
12086 }
12087
12088 pm_node_t *parameters = (pm_node_t *) block_parameters;
12089 uint8_t maximum = parser->current_scope->numbered_parameters;
12090
12091 if (parameters == NULL && (maximum > 0)) {
12092 parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = opening.start, .end = parser->previous.end }, maximum);
12093 locals_body_index = maximum;
12094 }
12095
12096 pm_constant_id_list_t locals = parser->current_scope->locals;
12097 pm_parser_scope_pop(parser);
12098 pm_accepts_block_stack_pop(parser);
12099 return pm_block_node_create(parser, &locals, locals_body_index, &opening, parameters, statements, &parser->previous);
12100}
12101
12107static bool
12108parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call) {
12109 bool found = false;
12110
12111 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
12112 found |= true;
12113 arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
12114
12115 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
12116 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
12117 } else {
12118 pm_accepts_block_stack_push(parser, true);
12119 parse_arguments(parser, arguments, true, PM_TOKEN_PARENTHESIS_RIGHT);
12120 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_ARGUMENT_TERM_PAREN);
12121 pm_accepts_block_stack_pop(parser);
12122
12123 arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
12124 }
12125 } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
12126 found |= true;
12127 pm_accepts_block_stack_push(parser, false);
12128
12129 // If we get here, then the subsequent token cannot be used as an infix
12130 // operator. In this case we assume the subsequent token is part of an
12131 // argument to this method call.
12132 parse_arguments(parser, arguments, true, PM_TOKEN_EOF);
12133
12134 // If we have done with the arguments and still not consumed the comma,
12135 // then we have a trailing comma where we need to check whether it is
12136 // allowed or not.
12137 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
12138 pm_parser_err_previous(parser, PM_ERR_EXPECT_ARGUMENT);
12139 }
12140
12141 pm_accepts_block_stack_pop(parser);
12142 }
12143
12144 // If we're at the end of the arguments, we can now check if there is a block
12145 // node that starts with a {. If there is, then we can parse it and add it to
12146 // the arguments.
12147 if (accepts_block) {
12148 pm_block_node_t *block = NULL;
12149
12150 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
12151 found |= true;
12152 block = parse_block(parser);
12153 pm_arguments_validate_block(parser, arguments, block);
12154 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
12155 found |= true;
12156 block = parse_block(parser);
12157 }
12158
12159 if (block != NULL) {
12160 if (arguments->block == NULL) {
12161 arguments->block = (pm_node_t *) block;
12162 } else {
12163 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
12164 if (arguments->arguments == NULL) {
12165 arguments->arguments = pm_arguments_node_create(parser);
12166 }
12167 pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
12168 arguments->block = (pm_node_t *) block;
12169 }
12170 }
12171 }
12172
12173 return found;
12174}
12175
12176static inline pm_node_t *
12177parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword) {
12178 context_push(parser, PM_CONTEXT_PREDICATE);
12179 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
12180 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, error_id);
12181
12182 // Predicates are closed by a term, a "then", or a term and then a "then".
12183 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
12184
12185 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
12186 predicate_closed = true;
12187 *then_keyword = parser->previous;
12188 }
12189
12190 if (!predicate_closed) {
12191 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
12192 }
12193
12194 context_pop(parser);
12195 return predicate;
12196}
12197
12198static inline pm_node_t *
12199parse_conditional(pm_parser_t *parser, pm_context_t context) {
12200 pm_token_t keyword = parser->previous;
12201 pm_token_t then_keyword = not_provided(parser);
12202
12203 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword);
12204 pm_statements_node_t *statements = NULL;
12205
12207 pm_accepts_block_stack_push(parser, true);
12208 statements = parse_statements(parser, context);
12209 pm_accepts_block_stack_pop(parser);
12210 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
12211 }
12212
12213 pm_token_t end_keyword = not_provided(parser);
12214 pm_node_t *parent = NULL;
12215
12216 switch (context) {
12217 case PM_CONTEXT_IF:
12218 parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
12219 break;
12220 case PM_CONTEXT_UNLESS:
12221 parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
12222 break;
12223 default:
12224 assert(false && "unreachable");
12225 break;
12226 }
12227
12228 pm_node_t *current = parent;
12229
12230 // Parse any number of elsif clauses. This will form a linked list of if
12231 // nodes pointing to each other from the top.
12232 if (context == PM_CONTEXT_IF) {
12233 while (accept1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
12234 pm_token_t elsif_keyword = parser->previous;
12235 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword);
12236 pm_accepts_block_stack_push(parser, true);
12237 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF);
12238 pm_accepts_block_stack_pop(parser);
12239
12240 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
12241
12242 pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
12243 ((pm_if_node_t *) current)->consequent = elsif;
12244 current = elsif;
12245 }
12246 }
12247
12248 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
12249 parser_lex(parser);
12250 pm_token_t else_keyword = parser->previous;
12251
12252 pm_accepts_block_stack_push(parser, true);
12253 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE);
12254 pm_accepts_block_stack_pop(parser);
12255
12256 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
12257 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
12258
12259 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
12260
12261 switch (context) {
12262 case PM_CONTEXT_IF:
12263 ((pm_if_node_t *) current)->consequent = (pm_node_t *) else_node;
12264 break;
12265 case PM_CONTEXT_UNLESS:
12266 ((pm_unless_node_t *) parent)->consequent = else_node;
12267 break;
12268 default:
12269 assert(false && "unreachable");
12270 break;
12271 }
12272 } else {
12273 // We should specialize this error message to refer to 'if' or 'unless' explicitly.
12274 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
12275 }
12276
12277 // Set the appropriate end location for all of the nodes in the subtree.
12278 switch (context) {
12279 case PM_CONTEXT_IF: {
12280 pm_node_t *current = parent;
12281 bool recursing = true;
12282
12283 while (recursing) {
12284 switch (PM_NODE_TYPE(current)) {
12285 case PM_IF_NODE:
12286 pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
12287 current = ((pm_if_node_t *) current)->consequent;
12288 recursing = current != NULL;
12289 break;
12290 case PM_ELSE_NODE:
12291 pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
12292 recursing = false;
12293 break;
12294 default: {
12295 recursing = false;
12296 break;
12297 }
12298 }
12299 }
12300 break;
12301 }
12302 case PM_CONTEXT_UNLESS:
12303 pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
12304 break;
12305 default:
12306 assert(false && "unreachable");
12307 break;
12308 }
12309
12310 return parent;
12311}
12312
12317#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
12318 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
12319 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
12320 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
12321 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
12322 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
12323 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
12324 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
12325 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
12326 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
12327 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
12328
12333#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
12334 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
12335 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
12336 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
12337 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
12338 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
12339 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
12340 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
12341
12347#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
12348 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
12349 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
12350 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
12351 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
12352 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
12353 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
12354 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
12355 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
12356
12361#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
12362 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
12363 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
12364 case PM_TOKEN_CLASS_VARIABLE
12365
12370#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
12371 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
12372 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
12373 case PM_NUMBERED_REFERENCE_READ_NODE
12374
12375// Assert here that the flags are the same so that we can safely switch the type
12376// of the node without having to move the flags.
12377PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
12378
12383static inline pm_node_flags_t
12384parse_unescaped_encoding(const pm_parser_t *parser) {
12385 if (parser->explicit_encoding != NULL) {
12388 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
12390 }
12391 }
12392 return 0;
12393}
12394
12399static pm_node_t *
12400parse_string_part(pm_parser_t *parser) {
12401 switch (parser->current.type) {
12402 // Here the lexer has returned to us plain string content. In this case
12403 // we'll create a string node that has no opening or closing and return that
12404 // as the part. These kinds of parts look like:
12405 //
12406 // "aaa #{bbb} #@ccc ddd"
12407 // ^^^^ ^ ^^^^
12409 pm_token_t opening = not_provided(parser);
12410 pm_token_t closing = not_provided(parser);
12411
12412 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
12413 pm_node_flag_set(node, parse_unescaped_encoding(parser));
12414
12415 parser_lex(parser);
12416 return node;
12417 }
12418 // Here the lexer has returned the beginning of an embedded expression. In
12419 // that case we'll parse the inner statements and return that as the part.
12420 // These kinds of parts look like:
12421 //
12422 // "aaa #{bbb} #@ccc ddd"
12423 // ^^^^^^
12425 pm_lex_state_t state = parser->lex_state;
12426 int brace_nesting = parser->brace_nesting;
12427
12428 parser->brace_nesting = 0;
12429 lex_state_set(parser, PM_LEX_STATE_BEG);
12430 parser_lex(parser);
12431
12432 pm_token_t opening = parser->previous;
12433 pm_statements_node_t *statements = NULL;
12434
12435 if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
12436 pm_accepts_block_stack_push(parser, true);
12437 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR);
12438 pm_accepts_block_stack_pop(parser);
12439 }
12440
12441 parser->brace_nesting = brace_nesting;
12442 lex_state_set(parser, state);
12443
12444 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
12445 pm_token_t closing = parser->previous;
12446
12447 return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
12448 }
12449
12450 // Here the lexer has returned the beginning of an embedded variable.
12451 // In that case we'll parse the variable and create an appropriate node
12452 // for it and then return that node. These kinds of parts look like:
12453 //
12454 // "aaa #{bbb} #@ccc ddd"
12455 // ^^^^^
12456 case PM_TOKEN_EMBVAR: {
12457 lex_state_set(parser, PM_LEX_STATE_BEG);
12458 parser_lex(parser);
12459
12460 pm_token_t operator = parser->previous;
12461 pm_node_t *variable;
12462
12463 switch (parser->current.type) {
12464 // In this case a back reference is being interpolated. We'll
12465 // create a global variable read node.
12467 parser_lex(parser);
12468 variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
12469 break;
12470 // In this case an nth reference is being interpolated. We'll
12471 // create a global variable read node.
12473 parser_lex(parser);
12474 variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
12475 break;
12476 // In this case a global variable is being interpolated. We'll
12477 // create a global variable read node.
12479 parser_lex(parser);
12480 variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
12481 break;
12482 // In this case an instance variable is being interpolated.
12483 // We'll create an instance variable read node.
12485 parser_lex(parser);
12486 variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
12487 break;
12488 // In this case a class variable is being interpolated. We'll
12489 // create a class variable read node.
12491 parser_lex(parser);
12492 variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
12493 break;
12494 // We can hit here if we got an invalid token. In that case
12495 // we'll not attempt to lex this token and instead just return a
12496 // missing node.
12497 default:
12498 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
12499 variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
12500 break;
12501 }
12502
12503 return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
12504 }
12505 default:
12506 parser_lex(parser);
12507 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
12508 return NULL;
12509 }
12510}
12511
12512static pm_node_t *
12513parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state) {
12514 pm_token_t opening = parser->previous;
12515
12516 if (lex_mode->mode != PM_LEX_STRING) {
12517 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12518
12519 switch (parser->current.type) {
12521 case PM_TOKEN_CONSTANT:
12528 case PM_CASE_KEYWORD:
12529 parser_lex(parser);
12530 break;
12531 case PM_CASE_OPERATOR:
12532 lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
12533 parser_lex(parser);
12534 break;
12535 default:
12536 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
12537 break;
12538 }
12539
12540 pm_token_t closing = not_provided(parser);
12541 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12542
12543 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
12544 return (pm_node_t *) symbol;
12545 }
12546
12547 if (lex_mode->as.string.interpolation) {
12548 // If we have the end of the symbol, then we can return an empty symbol.
12549 if (match1(parser, PM_TOKEN_STRING_END)) {
12550 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12551 parser_lex(parser);
12552
12553 pm_token_t content = not_provided(parser);
12554 pm_token_t closing = parser->previous;
12555 return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
12556 }
12557
12558 // Now we can parse the first part of the symbol.
12559 pm_node_t *part = parse_string_part(parser);
12560
12561 // If we got a string part, then it's possible that we could transform
12562 // what looks like an interpolated symbol into a regular symbol.
12563 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
12564 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12565 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
12566
12567 return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
12568 }
12569
12570 // Create a node_list first. We'll use this to check if it should be an
12571 // InterpolatedSymbolNode or a SymbolNode.
12572 pm_node_list_t node_list = { 0 };
12573 if (part) pm_node_list_append(&node_list, part);
12574
12575 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
12576 if ((part = parse_string_part(parser)) != NULL) {
12577 pm_node_list_append(&node_list, part);
12578 }
12579 }
12580
12581 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
12582 if (match1(parser, PM_TOKEN_EOF)) {
12583 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
12584 } else {
12585 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
12586 }
12587
12588 return (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous);
12589 }
12590
12591 pm_token_t content;
12592 pm_string_t unescaped;
12593
12594 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
12595 content = parser->current;
12596 unescaped = parser->current_string;
12597 parser_lex(parser);
12598
12599 // If we have two string contents in a row, then the content of this
12600 // symbol is split because of heredoc contents. This looks like:
12601 //
12602 // <<A; :'a
12603 // A
12604 // b'
12605 //
12606 // In this case, the best way we have to represent this is as an
12607 // interpolated string node, so that's what we'll do here.
12608 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
12609 pm_node_list_t parts = { 0 };
12610 pm_token_t bounds = not_provided(parser);
12611
12612 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
12613 pm_node_list_append(&parts, part);
12614
12615 part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
12616 pm_node_list_append(&parts, part);
12617
12618 if (next_state != PM_LEX_STATE_NONE) {
12619 lex_state_set(parser, next_state);
12620 }
12621
12622 parser_lex(parser);
12623 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12624 return (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
12625 }
12626 } else {
12627 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
12628 pm_string_shared_init(&unescaped, content.start, content.end);
12629 }
12630
12631 if (next_state != PM_LEX_STATE_NONE) {
12632 lex_state_set(parser, next_state);
12633 }
12634
12635 if (match1(parser, PM_TOKEN_EOF)) {
12636 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
12637 } else {
12638 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
12639 }
12640 return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
12641}
12642
12647static inline pm_node_t *
12648parse_undef_argument(pm_parser_t *parser) {
12649 switch (parser->current.type) {
12650 case PM_CASE_KEYWORD:
12651 case PM_CASE_OPERATOR:
12652 case PM_TOKEN_CONSTANT:
12654 case PM_TOKEN_METHOD_NAME: {
12655 parser_lex(parser);
12656
12657 pm_token_t opening = not_provided(parser);
12658 pm_token_t closing = not_provided(parser);
12659 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12660
12661 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
12662 return (pm_node_t *) symbol;
12663 }
12664 case PM_TOKEN_SYMBOL_BEGIN: {
12665 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
12666 parser_lex(parser);
12667
12668 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE);
12669 }
12670 default:
12671 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
12672 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
12673 }
12674}
12675
12682static inline pm_node_t *
12683parse_alias_argument(pm_parser_t *parser, bool first) {
12684 switch (parser->current.type) {
12685 case PM_CASE_OPERATOR:
12686 case PM_CASE_KEYWORD:
12687 case PM_TOKEN_CONSTANT:
12689 case PM_TOKEN_METHOD_NAME: {
12690 if (first) {
12691 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
12692 }
12693
12694 parser_lex(parser);
12695 pm_token_t opening = not_provided(parser);
12696 pm_token_t closing = not_provided(parser);
12697 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
12698
12699 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
12700 return (pm_node_t *) symbol;
12701 }
12702 case PM_TOKEN_SYMBOL_BEGIN: {
12703 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
12704 parser_lex(parser);
12705
12706 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
12707 }
12709 parser_lex(parser);
12710 return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
12712 parser_lex(parser);
12713 return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
12715 parser_lex(parser);
12716 return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
12717 default:
12718 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
12719 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
12720 }
12721}
12722
12727static bool
12728outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
12729 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
12730 if (scope->numbered_parameters) return true;
12731 }
12732
12733 return false;
12734}
12735
12739static pm_node_t *
12740parse_variable_call(pm_parser_t *parser) {
12741 pm_node_flags_t flags = 0;
12742
12743 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
12744 int depth;
12745 if ((depth = pm_parser_local_depth(parser, &parser->previous)) != -1) {
12746 return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth);
12747 }
12748
12749 if (!parser->current_scope->closed && pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
12750 // Now that we know we have a numbered parameter, we need to check
12751 // if it's allowed in this context. If it is, then we will create a
12752 // local variable read. If it's not, then we'll create a normal call
12753 // node but add an error.
12754 if (parser->current_scope->explicit_params) {
12755 pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
12756 } else if (outer_scope_using_numbered_parameters_p(parser)) {
12757 pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
12758 } else {
12759 // Indicate that this scope is using numbered params so that child
12760 // scopes cannot.
12761 uint8_t number = parser->previous.start[1];
12762
12763 // We subtract the value for the character '0' to get the actual
12764 // integer value of the number (only _1 through _9 are valid)
12765 uint8_t numbered_parameters = (uint8_t) (number - '0');
12766 if (numbered_parameters > parser->current_scope->numbered_parameters) {
12767 parser->current_scope->numbered_parameters = numbered_parameters;
12768 pm_parser_numbered_parameters_set(parser, numbered_parameters);
12769 }
12770
12771 // When you use a numbered parameter, it implies the existence
12772 // of all of the locals that exist before it. For example,
12773 // referencing _2 means that _1 must exist. Therefore here we
12774 // loop through all of the possibilities and add them into the
12775 // constant pool.
12776 uint8_t current = '1';
12777 uint8_t *value;
12778
12779 while (current < number) {
12780 value = malloc(2);
12781 value[0] = '_';
12782 value[1] = current++;
12783 pm_parser_local_add_owned(parser, value, 2);
12784 }
12785
12786 // Now we can add the actual token that is being used. For
12787 // this one we can add a shared version since it is directly
12788 // referenced in the source.
12789 pm_parser_local_add_token(parser, &parser->previous);
12790 return (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
12791 }
12792 }
12793
12795 }
12796
12797 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
12798 pm_node_flag_set((pm_node_t *)node, flags);
12799
12800 return (pm_node_t *) node;
12801}
12802
12808static inline pm_token_t
12809parse_method_definition_name(pm_parser_t *parser) {
12810 switch (parser->current.type) {
12811 case PM_CASE_KEYWORD:
12812 case PM_TOKEN_CONSTANT:
12814 parser_lex(parser);
12815 return parser->previous;
12817 pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
12818 parser_lex(parser);
12819 return parser->previous;
12820 case PM_CASE_OPERATOR:
12821 lex_state_set(parser, PM_LEX_STATE_ENDFN);
12822 parser_lex(parser);
12823 return parser->previous;
12824 default:
12825 return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
12826 }
12827}
12828
12829static void
12830parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
12831 // Get a reference to the string struct that is being held by the string
12832 // node. This is the value we're going to actually manipulate.
12833 pm_string_ensure_owned(string);
12834
12835 // Now get the bounds of the existing string. We'll use this as a
12836 // destination to move bytes into. We'll also use it for bounds checking
12837 // since we don't require that these strings be null terminated.
12838 size_t dest_length = pm_string_length(string);
12839 const uint8_t *source_cursor = (uint8_t *) string->source;
12840 const uint8_t *source_end = source_cursor + dest_length;
12841
12842 // We're going to move bytes backward in the string when we get leading
12843 // whitespace, so we'll maintain a pointer to the current position in the
12844 // string that we're writing to.
12845 size_t trimmed_whitespace = 0;
12846
12847 // While we haven't reached the amount of common whitespace that we need to
12848 // trim and we haven't reached the end of the string, we'll keep trimming
12849 // whitespace. Trimming in this context means skipping over these bytes such
12850 // that they aren't copied into the new string.
12851 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
12852 if (*source_cursor == '\t') {
12853 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
12854 if (trimmed_whitespace > common_whitespace) break;
12855 } else {
12856 trimmed_whitespace++;
12857 }
12858
12859 source_cursor++;
12860 dest_length--;
12861 }
12862
12863 memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
12864 string->length = dest_length;
12865}
12866
12870static void
12871parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
12872 // The next node should be dedented if it's the first node in the list or if
12873 // if follows a string node.
12874 bool dedent_next = true;
12875
12876 // Iterate over all nodes, and trim whitespace accordingly. We're going to
12877 // keep around two indices: a read and a write. If we end up trimming all of
12878 // the whitespace from a node, then we'll drop it from the list entirely.
12879 size_t write_index = 0;
12880
12881 for (size_t read_index = 0; read_index < nodes->size; read_index++) {
12882 pm_node_t *node = nodes->nodes[read_index];
12883
12884 // We're not manipulating child nodes that aren't strings. In this case
12885 // we'll skip past it and indicate that the subsequent node should not
12886 // be dedented.
12887 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
12888 nodes->nodes[write_index++] = node;
12889 dedent_next = false;
12890 continue;
12891 }
12892
12893 pm_string_node_t *string_node = ((pm_string_node_t *) node);
12894 if (dedent_next) {
12895 parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
12896 }
12897
12898 if (string_node->unescaped.length == 0) {
12899 pm_node_destroy(parser, node);
12900 } else {
12901 nodes->nodes[write_index++] = node;
12902 }
12903
12904 // We always dedent the next node if it follows a string node.
12905 dedent_next = true;
12906 }
12907
12908 nodes->size = write_index;
12909}
12910
12911static pm_node_t *
12912parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id);
12913
12917static pm_node_t *
12918parse_pattern_constant_path(pm_parser_t *parser, pm_node_t *node) {
12919 // Now, if there are any :: operators that follow, parse them as constant
12920 // path nodes.
12921 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
12922 pm_token_t delimiter = parser->previous;
12923 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
12924
12925 pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
12926 node = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
12927 }
12928
12929 // If there is a [ or ( that follows, then this is part of a larger pattern
12930 // expression. We'll parse the inner pattern here, then modify the returned
12931 // inner pattern with our constant path attached.
12932 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
12933 return node;
12934 }
12935
12936 pm_token_t opening;
12937 pm_token_t closing;
12938 pm_node_t *inner = NULL;
12939
12940 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
12941 opening = parser->previous;
12942 accept1(parser, PM_TOKEN_NEWLINE);
12943
12944 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
12945 inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
12946 accept1(parser, PM_TOKEN_NEWLINE);
12947 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
12948 }
12949
12950 closing = parser->previous;
12951 } else {
12952 parser_lex(parser);
12953 opening = parser->previous;
12954
12955 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
12956 inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
12957 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
12958 }
12959
12960 closing = parser->previous;
12961 }
12962
12963 if (!inner) {
12964 // If there was no inner pattern, then we have something like Foo() or
12965 // Foo[]. In that case we'll create an array pattern with no requireds.
12966 return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
12967 }
12968
12969 // Now that we have the inner pattern, check to see if it's an array, find,
12970 // or hash pattern. If it is, then we'll attach our constant path to it if
12971 // it doesn't already have a constant. If it's not one of those node types
12972 // or it does have a constant, then we'll create an array pattern.
12973 switch (PM_NODE_TYPE(inner)) {
12974 case PM_ARRAY_PATTERN_NODE: {
12975 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
12976
12977 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
12978 pattern_node->base.location.start = node->location.start;
12979 pattern_node->base.location.end = closing.end;
12980
12981 pattern_node->constant = node;
12982 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
12983 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
12984
12985 return (pm_node_t *) pattern_node;
12986 }
12987
12988 break;
12989 }
12990 case PM_FIND_PATTERN_NODE: {
12991 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
12992
12993 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
12994 pattern_node->base.location.start = node->location.start;
12995 pattern_node->base.location.end = closing.end;
12996
12997 pattern_node->constant = node;
12998 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
12999 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
13000
13001 return (pm_node_t *) pattern_node;
13002 }
13003
13004 break;
13005 }
13006 case PM_HASH_PATTERN_NODE: {
13007 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
13008
13009 if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
13010 pattern_node->base.location.start = node->location.start;
13011 pattern_node->base.location.end = closing.end;
13012
13013 pattern_node->constant = node;
13014 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
13015 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
13016
13017 return (pm_node_t *) pattern_node;
13018 }
13019
13020 break;
13021 }
13022 default:
13023 break;
13024 }
13025
13026 // If we got here, then we didn't return one of the inner patterns by
13027 // attaching its constant. In this case we'll create an array pattern and
13028 // attach our constant to it.
13029 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
13030 pm_array_pattern_node_requireds_append(pattern_node, inner);
13031 return (pm_node_t *) pattern_node;
13032}
13033
13037static pm_splat_node_t *
13038parse_pattern_rest(pm_parser_t *parser) {
13039 assert(parser->previous.type == PM_TOKEN_USTAR);
13040 pm_token_t operator = parser->previous;
13041 pm_node_t *name = NULL;
13042
13043 // Rest patterns don't necessarily have a name associated with them. So we
13044 // will check for that here. If they do, then we'll add it to the local table
13045 // since this pattern will cause it to become a local variable.
13046 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13047 pm_token_t identifier = parser->previous;
13048 pm_parser_local_add_token(parser, &identifier);
13049 name = (pm_node_t *) pm_local_variable_target_node_create(parser, &identifier);
13050 }
13051
13052 // Finally we can return the created node.
13053 return pm_splat_node_create(parser, &operator, name);
13054}
13055
13059static pm_node_t *
13060parse_pattern_keyword_rest(pm_parser_t *parser) {
13061 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
13062 parser_lex(parser);
13063
13064 pm_token_t operator = parser->previous;
13065 pm_node_t *value = NULL;
13066
13067 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
13068 return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
13069 }
13070
13071 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13072 pm_parser_local_add_token(parser, &parser->previous);
13073 value = (pm_node_t *) pm_local_variable_target_node_create(parser, &parser->previous);
13074 }
13075
13076 return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
13077}
13078
13083parse_pattern_hash(pm_parser_t *parser, pm_node_t *first_assoc) {
13084 pm_node_list_t assocs = { 0 };
13085 pm_node_t *rest = NULL;
13086
13087 switch (PM_NODE_TYPE(first_assoc)) {
13088 case PM_ASSOC_NODE: {
13090 // Here we have a value for the first assoc in the list, so we will
13091 // parse it now and update the first assoc.
13092 pm_node_t *value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13093
13094 pm_assoc_node_t *assoc = (pm_assoc_node_t *) first_assoc;
13095 assoc->base.location.end = value->location.end;
13096 assoc->value = value;
13097 } else {
13098 pm_node_t *key = ((pm_assoc_node_t *) first_assoc)->key;
13099
13100 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
13101 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13102 pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13103 }
13104 }
13105
13106 pm_node_list_append(&assocs, first_assoc);
13107 break;
13108 }
13111 rest = first_assoc;
13112 break;
13113 default:
13114 assert(false);
13115 break;
13116 }
13117
13118 // If there are any other assocs, then we'll parse them now.
13119 while (accept1(parser, PM_TOKEN_COMMA)) {
13120 // Here we need to break to support trailing commas.
13122 break;
13123 }
13124
13125 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
13126 pm_node_t *assoc = parse_pattern_keyword_rest(parser);
13127
13128 if (rest == NULL) {
13129 rest = assoc;
13130 } else {
13131 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
13132 pm_node_list_append(&assocs, assoc);
13133 }
13134 } else {
13135 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
13136 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13137 pm_node_t *value = NULL;
13138
13140 value = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY);
13141 } else {
13142 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
13143 pm_parser_local_add_location(parser, value_loc->start, value_loc->end);
13144 }
13145
13146 pm_token_t operator = not_provided(parser);
13147 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
13148
13149 if (rest != NULL) {
13150 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
13151 }
13152
13153 pm_node_list_append(&assocs, assoc);
13154 }
13155 }
13156
13157 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
13158 free(assocs.nodes);
13159
13160 return node;
13161}
13162
13166static pm_node_t *
13167parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13168 switch (parser->current.type) {
13170 case PM_TOKEN_METHOD_NAME: {
13171 parser_lex(parser);
13172 pm_token_t name = parser->previous;
13173 int depth = pm_parser_local_depth(parser, &name);
13174 if (depth < 0) {
13175 depth = 0;
13176 pm_parser_local_add_token(parser, &name);
13177 }
13178 return (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &name, (uint32_t) depth);
13179 }
13181 pm_token_t opening = parser->current;
13182 parser_lex(parser);
13183
13184 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
13185 // If we have an empty array pattern, then we'll just return a new
13186 // array pattern node.
13187 return (pm_node_t *)pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
13188 }
13189
13190 // Otherwise, we'll parse the inner pattern, then deal with it depending
13191 // on the type it returns.
13192 pm_node_t *inner = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET);
13193
13194 accept1(parser, PM_TOKEN_NEWLINE);
13195
13196 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
13197 pm_token_t closing = parser->previous;
13198
13199 switch (PM_NODE_TYPE(inner)) {
13200 case PM_ARRAY_PATTERN_NODE: {
13201 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
13202 if (pattern_node->opening_loc.start == NULL) {
13203 pattern_node->base.location.start = opening.start;
13204 pattern_node->base.location.end = closing.end;
13205
13206 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
13207 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
13208
13209 return (pm_node_t *) pattern_node;
13210 }
13211
13212 break;
13213 }
13214 case PM_FIND_PATTERN_NODE: {
13215 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
13216 if (pattern_node->opening_loc.start == NULL) {
13217 pattern_node->base.location.start = opening.start;
13218 pattern_node->base.location.end = closing.end;
13219
13220 pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
13221 pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
13222
13223 return (pm_node_t *) pattern_node;
13224 }
13225
13226 break;
13227 }
13228 default:
13229 break;
13230 }
13231
13232 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
13233 pm_array_pattern_node_requireds_append(node, inner);
13234 return (pm_node_t *) node;
13235 }
13236 case PM_TOKEN_BRACE_LEFT: {
13237 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
13238 parser->pattern_matching_newlines = false;
13239
13241 pm_token_t opening = parser->current;
13242 parser_lex(parser);
13243
13244 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
13245 // If we have an empty hash pattern, then we'll just return a new hash
13246 // pattern node.
13247 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
13248 } else {
13249 pm_node_t *first_assoc;
13250
13251 switch (parser->current.type) {
13252 case PM_TOKEN_LABEL: {
13253 parser_lex(parser);
13254
13255 pm_symbol_node_t *key = pm_symbol_node_label_create(parser, &parser->previous);
13256 pm_token_t operator = not_provided(parser);
13257
13258 first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13259 break;
13260 }
13262 first_assoc = parse_pattern_keyword_rest(parser);
13263 break;
13264 case PM_TOKEN_STRING_BEGIN: {
13265 pm_node_t *key = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_HASH_KEY);
13266 pm_token_t operator = not_provided(parser);
13267
13268 if (!pm_symbol_node_label_p(key)) {
13269 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
13270 }
13271
13272 first_assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL);
13273 break;
13274 }
13275 default: {
13276 parser_lex(parser);
13277 pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
13278
13279 pm_missing_node_t *key = pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
13280 pm_token_t operator = not_provided(parser);
13281
13282 first_assoc = (pm_node_t *) pm_assoc_node_create(parser, (pm_node_t *) key, &operator, NULL);
13283 break;
13284 }
13285 }
13286
13287 node = parse_pattern_hash(parser, first_assoc);
13288
13289 accept1(parser, PM_TOKEN_NEWLINE);
13290 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
13291 pm_token_t closing = parser->previous;
13292
13293 node->base.location.start = opening.start;
13294 node->base.location.end = closing.end;
13295
13296 node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
13297 node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
13298 }
13299
13300 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
13301 return (pm_node_t *) node;
13302 }
13303 case PM_TOKEN_UDOT_DOT:
13304 case PM_TOKEN_UDOT_DOT_DOT: {
13305 pm_token_t operator = parser->current;
13306 parser_lex(parser);
13307
13308 // Since we have a unary range operator, we need to parse the subsequent
13309 // expression as the right side of the range.
13310 switch (parser->current.type) {
13311 case PM_CASE_PRIMITIVE: {
13312 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13313 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
13314 }
13315 default: {
13316 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13317 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
13318 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
13319 }
13320 }
13321 }
13322 case PM_CASE_PRIMITIVE: {
13323 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, diag_id);
13324
13325 // Now that we have a primitive, we need to check if it's part of a range.
13326 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
13327 pm_token_t operator = parser->previous;
13328
13329 // Now that we have the operator, we need to check if this is followed
13330 // by another expression. If it is, then we will create a full range
13331 // node. Otherwise, we'll create an endless range.
13332 switch (parser->current.type) {
13333 case PM_CASE_PRIMITIVE: {
13334 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
13335 return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
13336 }
13337 default:
13338 return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
13339 }
13340 }
13341
13342 return node;
13343 }
13344 case PM_TOKEN_CARET: {
13345 parser_lex(parser);
13346 pm_token_t operator = parser->previous;
13347
13348 // At this point we have a pin operator. We need to check the subsequent
13349 // expression to determine if it's a variable or an expression.
13350 switch (parser->current.type) {
13351 case PM_TOKEN_IDENTIFIER: {
13352 parser_lex(parser);
13353 pm_node_t *variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13354
13355 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13356 }
13358 parser_lex(parser);
13359 pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
13360
13361 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13362 }
13364 parser_lex(parser);
13365 pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
13366
13367 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13368 }
13370 parser_lex(parser);
13371 pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
13372
13373 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13374 }
13376 parser_lex(parser);
13377 pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
13378
13379 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13380 }
13382 parser_lex(parser);
13383 pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
13384
13385 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13386 }
13388 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
13389 parser->pattern_matching_newlines = false;
13390
13391 pm_token_t lparen = parser->current;
13392 parser_lex(parser);
13393
13394 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
13395 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
13396
13397 accept1(parser, PM_TOKEN_NEWLINE);
13398 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
13399 return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
13400 }
13401 default: {
13402 // If we get here, then we have a pin operator followed by something
13403 // not understood. We'll create a missing node and return that.
13404 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
13405 pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
13406 return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
13407 }
13408 }
13409 }
13410 case PM_TOKEN_UCOLON_COLON: {
13411 pm_token_t delimiter = parser->current;
13412 parser_lex(parser);
13413
13414 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
13415 pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
13416 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, child);
13417
13418 return parse_pattern_constant_path(parser, (pm_node_t *)node);
13419 }
13420 case PM_TOKEN_CONSTANT: {
13421 pm_token_t constant = parser->current;
13422 parser_lex(parser);
13423
13424 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
13425 return parse_pattern_constant_path(parser, node);
13426 }
13427 default:
13428 pm_parser_err_current(parser, diag_id);
13429 return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
13430 }
13431}
13432
13437static pm_node_t *
13438parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13439 pm_node_t *node = NULL;
13440
13441 do {
13442 pm_token_t operator = parser->previous;
13443
13444 switch (parser->current.type) {
13448 case PM_TOKEN_CARET:
13449 case PM_TOKEN_CONSTANT:
13451 case PM_TOKEN_UDOT_DOT:
13453 case PM_CASE_PRIMITIVE: {
13454 if (node == NULL) {
13455 node = parse_pattern_primitive(parser, diag_id);
13456 } else {
13457 pm_node_t *right = parse_pattern_primitive(parser, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE);
13458 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
13459 }
13460
13461 break;
13462 }
13464 parser_lex(parser);
13465 if (node != NULL) {
13466 pm_node_destroy(parser, node);
13467 }
13468 node = parse_pattern(parser, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN);
13469
13470 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
13471 break;
13472 }
13473 default: {
13474 pm_parser_err_current(parser, diag_id);
13475 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
13476
13477 if (node == NULL) {
13478 node = right;
13479 } else {
13480 node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
13481 }
13482
13483 break;
13484 }
13485 }
13486 } while (accept1(parser, PM_TOKEN_PIPE));
13487
13488 // If we have an =>, then we are assigning this pattern to a variable.
13489 // In this case we should create an assignment node.
13490 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
13491 pm_token_t operator = parser->previous;
13492
13493 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
13494 pm_token_t identifier = parser->previous;
13495 int depth = pm_parser_local_depth(parser, &identifier);
13496 if (depth < 0) {
13497 depth = 0;
13498 pm_parser_local_add_token(parser, &identifier);
13499 }
13500
13501 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_depth(parser, &identifier, (uint32_t) depth);
13502 node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
13503 }
13504
13505 return node;
13506}
13507
13511static pm_node_t *
13512parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id) {
13513 pm_node_t *node = NULL;
13514
13515 bool leading_rest = false;
13516 bool trailing_rest = false;
13517
13518 switch (parser->current.type) {
13519 case PM_TOKEN_LABEL: {
13520 parser_lex(parser);
13521 pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
13522 pm_token_t operator = not_provided(parser);
13523
13524 return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, key, &operator, NULL));
13525 }
13526 case PM_TOKEN_USTAR_STAR: {
13527 node = parse_pattern_keyword_rest(parser);
13528 return (pm_node_t *) parse_pattern_hash(parser, node);
13529 }
13530 case PM_TOKEN_USTAR: {
13531 if (top_pattern) {
13532 parser_lex(parser);
13533 node = (pm_node_t *) parse_pattern_rest(parser);
13534 leading_rest = true;
13535 break;
13536 }
13537 }
13538 /* fallthrough */
13539 default:
13540 node = parse_pattern_primitives(parser, diag_id);
13541 break;
13542 }
13543
13544 // If we got a dynamic label symbol, then we need to treat it like the
13545 // beginning of a hash pattern.
13546 if (pm_symbol_node_label_p(node)) {
13547 pm_token_t operator = not_provided(parser);
13548 return (pm_node_t *) parse_pattern_hash(parser, (pm_node_t *) pm_assoc_node_create(parser, node, &operator, NULL));
13549 }
13550
13551 if (top_pattern && match1(parser, PM_TOKEN_COMMA)) {
13552 // If we have a comma, then we are now parsing either an array pattern or a
13553 // find pattern. We need to parse all of the patterns, put them into a big
13554 // list, and then determine which type of node we have.
13555 pm_node_list_t nodes = { 0 };
13556 pm_node_list_append(&nodes, node);
13557
13558 // Gather up all of the patterns into the list.
13559 while (accept1(parser, PM_TOKEN_COMMA)) {
13560 // Break early here in case we have a trailing comma.
13562 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13563 pm_node_list_append(&nodes, node);
13564 break;
13565 }
13566
13567 if (accept1(parser, PM_TOKEN_USTAR)) {
13568 node = (pm_node_t *) parse_pattern_rest(parser);
13569
13570 // If we have already parsed a splat pattern, then this is an error. We
13571 // will continue to parse the rest of the patterns, but we will indicate
13572 // it as an error.
13573 if (trailing_rest) {
13574 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
13575 }
13576
13577 trailing_rest = true;
13578 } else {
13579 node = parse_pattern_primitives(parser, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA);
13580 }
13581
13582 pm_node_list_append(&nodes, node);
13583 }
13584
13585 // If the first pattern and the last pattern are rest patterns, then we will
13586 // call this a find pattern, regardless of how many rest patterns are in
13587 // between because we know we already added the appropriate errors.
13588 // Otherwise we will create an array pattern.
13589 if (PM_NODE_TYPE_P(nodes.nodes[0], PM_SPLAT_NODE) && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
13590 node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
13591 } else {
13592 node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
13593 }
13594
13595 free(nodes.nodes);
13596 } else if (leading_rest) {
13597 // Otherwise, if we parsed a single splat pattern, then we know we have an
13598 // array pattern, so we can go ahead and create that node.
13599 node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
13600 }
13601
13602 return node;
13603}
13604
13610static inline void
13611parse_negative_numeric(pm_node_t *node) {
13612 switch (PM_NODE_TYPE(node)) {
13613 case PM_INTEGER_NODE:
13614 case PM_FLOAT_NODE:
13615 node->location.start--;
13616 break;
13617 case PM_RATIONAL_NODE:
13618 node->location.start--;
13619 parse_negative_numeric(((pm_rational_node_t *) node)->numeric);
13620 break;
13621 case PM_IMAGINARY_NODE:
13622 node->location.start--;
13623 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
13624 break;
13625 default:
13626 assert(false && "unreachable");
13627 break;
13628 }
13629}
13630
13634static pm_token_t
13635parse_strings_empty_content(const uint8_t *location) {
13636 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
13637}
13638
13642static inline pm_node_t *
13643parse_strings(pm_parser_t *parser, pm_node_t *current) {
13644 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
13645
13646 bool concating = false;
13647 bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
13648
13649 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
13650 pm_node_t *node = NULL;
13651
13652 // Here we have found a string literal. We'll parse it and add it to
13653 // the list of strings.
13654 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
13655 assert(lex_mode->mode == PM_LEX_STRING);
13656 bool lex_interpolation = lex_mode->as.string.interpolation;
13657
13658 pm_token_t opening = parser->current;
13659 parser_lex(parser);
13660
13661 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
13662 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13663 // If we get here, then we have an end immediately after a
13664 // start. In that case we'll create an empty content token and
13665 // return an uninterpolated string.
13666 pm_token_t content = parse_strings_empty_content(parser->previous.start);
13667 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
13668
13669 pm_string_shared_init(&string->unescaped, content.start, content.end);
13670 node = (pm_node_t *) string;
13671 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
13672 // If we get here, then we have an end of a label immediately
13673 // after a start. In that case we'll create an empty symbol
13674 // node.
13675 pm_token_t opening = not_provided(parser);
13676 pm_token_t content = parse_strings_empty_content(parser->previous.start);
13677 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
13678
13679 pm_string_shared_init(&symbol->unescaped, content.start, content.end);
13680 node = (pm_node_t *) symbol;
13681 } else if (!lex_interpolation) {
13682 // If we don't accept interpolation then we expect the string to
13683 // start with a single string content node.
13684 pm_string_t unescaped;
13685 pm_token_t content;
13686 if (match1(parser, PM_TOKEN_EOF)) {
13687 unescaped = PM_STRING_EMPTY;
13688 content = not_provided(parser);
13689 } else {
13690 unescaped = parser->current_string;
13691 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
13692 content = parser->previous;
13693 }
13694
13695 // It is unfortunately possible to have multiple string content
13696 // nodes in a row in the case that there's heredoc content in
13697 // the middle of the string, like this cursed example:
13698 //
13699 // <<-END+'b
13700 // a
13701 // END
13702 // c'+'d'
13703 //
13704 // In that case we need to switch to an interpolated string to
13705 // be able to contain all of the parts.
13706 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
13707 pm_node_list_t parts = { 0 };
13708
13709 pm_token_t delimiters = not_provided(parser);
13710 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
13711 pm_node_list_append(&parts, part);
13712
13713 do {
13714 part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
13715 pm_node_list_append(&parts, part);
13716 parser_lex(parser);
13717 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
13718
13719 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13720 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
13721 } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13722 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13723 } else if (match1(parser, PM_TOKEN_EOF)) {
13724 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
13725 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
13726 } else {
13727 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13728 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13729 }
13730 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
13731 // In this case we've hit string content so we know the string
13732 // at least has something in it. We'll need to check if the
13733 // following token is the end (in which case we can return a
13734 // plain string) or if it's not then it has interpolation.
13735 pm_token_t content = parser->current;
13736 pm_string_t unescaped = parser->current_string;
13737 parser_lex(parser);
13738
13739 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
13740 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
13741 pm_node_flag_set(node, parse_unescaped_encoding(parser));
13742 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
13743 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
13744 node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
13745 } else {
13746 // If we get here, then we have interpolation so we'll need
13747 // to create a string or symbol node with interpolation.
13748 pm_node_list_t parts = { 0 };
13749 pm_token_t string_opening = not_provided(parser);
13750 pm_token_t string_closing = not_provided(parser);
13751
13752 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
13753 pm_node_flag_set(part, parse_unescaped_encoding(parser));
13754 pm_node_list_append(&parts, part);
13755
13756 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
13757 if ((part = parse_string_part(parser)) != NULL) {
13758 pm_node_list_append(&parts, part);
13759 }
13760 }
13761
13762 if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
13763 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
13764 } else if (match1(parser, PM_TOKEN_EOF)) {
13765 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
13766 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
13767 } else {
13768 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
13769 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
13770 }
13771 }
13772 } else {
13773 // If we get here, then the first part of the string is not plain
13774 // string content, in which case we need to parse the string as an
13775 // interpolated string.
13776 pm_node_list_t parts = { 0 };
13777 pm_node_t *part;
13778
13779 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
13780 if ((part = parse_string_part(parser)) != NULL) {
13781 pm_node_list_append(&parts, part);
13782 }
13783 }
13784
13785 if (accept1(parser, PM_TOKEN_LABEL_END)) {
13786 node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
13787 } else if (match1(parser, PM_TOKEN_EOF)) {
13788 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
13789 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
13790 } else {
13791 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
13792 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
13793 }
13794 }
13795
13796 if (current == NULL) {
13797 // If the node we just parsed is a symbol node, then we can't
13798 // concatenate it with anything else, so we can now return that
13799 // node.
13801 return node;
13802 }
13803
13804 // If we don't already have a node, then it's fine and we can just
13805 // set the result to be the node we just parsed.
13806 current = node;
13807 } else {
13808 // Otherwise we need to check the type of the node we just parsed.
13809 // If it cannot be concatenated with the previous node, then we'll
13810 // need to add a syntax error.
13812 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
13813 }
13814
13815 // If we haven't already created our container for concatenation,
13816 // we'll do that now.
13817 if (!concating) {
13818 concating = true;
13819 pm_token_t bounds = not_provided(parser);
13820
13821 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
13822 pm_interpolated_string_node_append(container, current);
13823 current = (pm_node_t *) container;
13824 }
13825
13826 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
13827 }
13828 }
13829
13830 return current;
13831}
13832
13836static inline pm_node_t *
13837parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call) {
13838 switch (parser->current.type) {
13840 parser_lex(parser);
13841
13842 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
13843 pm_accepts_block_stack_push(parser, true);
13844 bool parsed_bare_hash = false;
13845
13846 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
13847 // Handle the case where we don't have a comma and we have a
13848 // newline followed by a right bracket.
13849 if (accept1(parser, PM_TOKEN_NEWLINE) && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
13850 break;
13851 }
13852
13853 if (pm_array_node_size(array) != 0) {
13854 expect1(parser, PM_TOKEN_COMMA, PM_ERR_ARRAY_SEPARATOR);
13855 }
13856
13857 // If we have a right bracket immediately following a comma,
13858 // this is allowed since it's a trailing comma. In this case we
13859 // can break out of the loop.
13860 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
13861
13862 pm_node_t *element;
13863
13864 if (accept1(parser, PM_TOKEN_USTAR)) {
13865 pm_token_t operator = parser->previous;
13866 pm_node_t *expression = NULL;
13867
13868 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
13869 if (pm_parser_local_depth(parser, &parser->previous) == -1) {
13870 pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
13871 }
13872 } else {
13873 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR);
13874 }
13875
13876 element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13877 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
13878 if (parsed_bare_hash) {
13879 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
13880 }
13881
13882 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13883 element = (pm_node_t *)hash;
13884
13886 parse_assocs(parser, (pm_node_t *) hash);
13887 }
13888
13889 parsed_bare_hash = true;
13890 } else {
13891 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
13892
13893 if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
13894 if (parsed_bare_hash) {
13895 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
13896 }
13897
13898 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13899
13900 pm_token_t operator;
13901 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13902 operator = parser->previous;
13903 } else {
13904 operator = not_provided(parser);
13905 }
13906
13907 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
13908 pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
13909 pm_keyword_hash_node_elements_append(hash, assoc);
13910
13911 element = (pm_node_t *)hash;
13912 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
13913 parse_assocs(parser, (pm_node_t *) hash);
13914 }
13915
13916 parsed_bare_hash = true;
13917 }
13918 }
13919
13920 pm_array_node_elements_append(array, element);
13921 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
13922 }
13923
13924 accept1(parser, PM_TOKEN_NEWLINE);
13925 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_ARRAY_TERM);
13926 pm_array_node_close_set(array, &parser->previous);
13927 pm_accepts_block_stack_pop(parser);
13928
13929 return (pm_node_t *) array;
13930 }
13933 pm_token_t opening = parser->current;
13934 parser_lex(parser);
13935 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13936
13937 // If this is the end of the file or we match a right parenthesis, then
13938 // we have an empty parentheses node, and we can immediately return.
13939 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
13940 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
13941 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
13942 }
13943
13944 // Otherwise, we're going to parse the first statement in the list
13945 // of statements within the parentheses.
13946 pm_accepts_block_stack_push(parser, true);
13947 context_push(parser, PM_CONTEXT_PARENS);
13948 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
13949 context_pop(parser);
13950
13951 // Determine if this statement is followed by a terminator. In the
13952 // case of a single statement, this is fine. But in the case of
13953 // multiple statements it's required.
13954 bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
13955 if (terminator_found) {
13956 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13957 }
13958
13959 // If we hit a right parenthesis, then we're done parsing the
13960 // parentheses node, and we can check which kind of node we should
13961 // return.
13962 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
13964 lex_state_set(parser, PM_LEX_STATE_ENDARG);
13965 }
13966 parser_lex(parser);
13967 pm_accepts_block_stack_pop(parser);
13968
13969 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
13970 // If we have a single statement and are ending on a right
13971 // parenthesis, then we need to check if this is possibly a
13972 // multiple target node.
13973 pm_multi_target_node_t *multi_target;
13974
13975 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
13976 multi_target = (pm_multi_target_node_t *) statement;
13977 } else {
13978 multi_target = pm_multi_target_node_create(parser);
13979 pm_multi_target_node_targets_append(parser, multi_target, statement);
13980 }
13981
13982 pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
13983 pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
13984
13985 multi_target->lparen_loc = lparen_loc;
13986 multi_target->rparen_loc = rparen_loc;
13987 multi_target->base.location.start = lparen_loc.start;
13988 multi_target->base.location.end = rparen_loc.end;
13989
13990 if (match1(parser, PM_TOKEN_COMMA)) {
13991 if (binding_power == PM_BINDING_POWER_STATEMENT) {
13992 return parse_targets_validate(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX);
13993 }
13994 return (pm_node_t *) multi_target;
13995 }
13996
13997 return parse_target_validate(parser, (pm_node_t *) multi_target);
13998 }
13999
14000 // If we have a single statement and are ending on a right parenthesis
14001 // and we didn't return a multiple assignment node, then we can return a
14002 // regular parentheses node now.
14003 pm_statements_node_t *statements = pm_statements_node_create(parser);
14004 pm_statements_node_body_append(statements, statement);
14005
14006 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
14007 }
14008
14009 // If we have more than one statement in the set of parentheses,
14010 // then we are going to parse all of them as a list of statements.
14011 // We'll do that here.
14012 context_push(parser, PM_CONTEXT_PARENS);
14013 pm_statements_node_t *statements = pm_statements_node_create(parser);
14014 pm_statements_node_body_append(statements, statement);
14015
14016 // If we didn't find a terminator and we didn't find a right
14017 // parenthesis, then this is a syntax error.
14018 if (!terminator_found) {
14019 pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
14020 }
14021
14022 // Parse each statement within the parentheses.
14023 while (true) {
14024 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_CANNOT_PARSE_EXPRESSION);
14025 pm_statements_node_body_append(statements, node);
14026
14027 // If we're recovering from a syntax error, then we need to stop
14028 // parsing the statements now.
14029 if (parser->recovering) {
14030 // If this is the level of context where the recovery has
14031 // happened, then we can mark the parser as done recovering.
14032 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
14033 break;
14034 }
14035
14036 // If we couldn't parse an expression at all, then we need to
14037 // bail out of the loop.
14038 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
14039
14040 // If we successfully parsed a statement, then we are going to
14041 // need terminator to delimit them.
14042 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14043 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
14044 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
14045 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14046 break;
14047 } else {
14048 pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
14049 }
14050 }
14051
14052 context_pop(parser);
14053 pm_accepts_block_stack_pop(parser);
14054 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14055
14056 return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
14057 }
14058 case PM_TOKEN_BRACE_LEFT: {
14059 pm_accepts_block_stack_push(parser, true);
14060 parser_lex(parser);
14061 pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
14062
14063 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
14064 parse_assocs(parser, (pm_node_t *) node);
14065 accept1(parser, PM_TOKEN_NEWLINE);
14066 }
14067
14068 pm_accepts_block_stack_pop(parser);
14069 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
14070 pm_hash_node_closing_loc_set(node, &parser->previous);
14071
14072 return (pm_node_t *) node;
14073 }
14075 parser_lex(parser);
14076
14077 pm_token_t opening = parser->previous;
14078 opening.type = PM_TOKEN_STRING_BEGIN;
14079 opening.end = opening.start + 1;
14080
14081 pm_token_t content = parser->previous;
14082 content.type = PM_TOKEN_STRING_CONTENT;
14083 content.start = content.start + 1;
14084
14085 pm_token_t closing = not_provided(parser);
14086 pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
14087 pm_node_flag_set(node, parse_unescaped_encoding(parser));
14088
14089 // Characters can be followed by strings in which case they are
14090 // automatically concatenated.
14091 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
14092 return parse_strings(parser, node);
14093 }
14094
14095 return node;
14096 }
14098 parser_lex(parser);
14099 pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
14100
14101 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
14102 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14103 }
14104
14105 return node;
14106 }
14107 case PM_TOKEN_CONSTANT: {
14108 parser_lex(parser);
14109 pm_token_t constant = parser->previous;
14110
14111 // If a constant is immediately followed by parentheses, then this is in
14112 // fact a method call, not a constant read.
14113 if (
14114 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
14115 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14116 (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14117 ) {
14118 pm_arguments_t arguments = { 0 };
14119 parse_arguments_list(parser, &arguments, true, accepts_command_call);
14120 return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
14121 }
14122
14123 pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
14124
14125 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
14126 // If we get here, then we have a comma immediately following a
14127 // constant, so we're going to parse this as a multiple assignment.
14128 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14129 }
14130
14131 return node;
14132 }
14133 case PM_TOKEN_UCOLON_COLON: {
14134 parser_lex(parser);
14135
14136 pm_token_t delimiter = parser->previous;
14137 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
14138
14139 pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
14140 pm_node_t *node = (pm_node_t *)pm_constant_path_node_create(parser, NULL, &delimiter, constant);
14141
14142 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
14143 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14144 }
14145
14146 return node;
14147 }
14148 case PM_TOKEN_UDOT_DOT:
14149 case PM_TOKEN_UDOT_DOT_DOT: {
14150 pm_token_t operator = parser->current;
14151 parser_lex(parser);
14152
14153 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
14154 return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14155 }
14156 case PM_TOKEN_FLOAT:
14157 parser_lex(parser);
14158 return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
14160 parser_lex(parser);
14161 return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
14163 parser_lex(parser);
14164 return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
14166 parser_lex(parser);
14167 return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
14169 parser_lex(parser);
14170 pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
14171
14172 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
14173 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14174 }
14175
14176 return node;
14177 }
14179 parser_lex(parser);
14180 pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
14181
14182 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
14183 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14184 }
14185
14186 return node;
14187 }
14189 parser_lex(parser);
14190 pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
14191
14192 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
14193 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14194 }
14195
14196 return node;
14197 }
14199 case PM_TOKEN_METHOD_NAME: {
14200 parser_lex(parser);
14201 pm_token_t identifier = parser->previous;
14202 pm_node_t *node = parse_variable_call(parser);
14203
14204 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
14205 // If parse_variable_call returned with a call node, then we
14206 // know the identifier is not in the local table. In that case
14207 // we need to check if there are arguments following the
14208 // identifier.
14209 pm_call_node_t *call = (pm_call_node_t *) node;
14210 pm_arguments_t arguments = { 0 };
14211
14212 if (parse_arguments_list(parser, &arguments, true, accepts_command_call)) {
14213 // Since we found arguments, we need to turn off the
14214 // variable call bit in the flags.
14215 pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
14216
14217 call->opening_loc = arguments.opening_loc;
14218 call->arguments = arguments.arguments;
14219 call->closing_loc = arguments.closing_loc;
14220 call->block = arguments.block;
14221
14222 if (arguments.block != NULL) {
14223 call->base.location.end = arguments.block->location.end;
14224 } else if (arguments.closing_loc.start == NULL) {
14225 if (arguments.arguments != NULL) {
14226 call->base.location.end = arguments.arguments->base.location.end;
14227 } else {
14228 call->base.location.end = call->message_loc.end;
14229 }
14230 } else {
14231 call->base.location.end = arguments.closing_loc.end;
14232 }
14233 }
14234 } else {
14235 // Otherwise, we know the identifier is in the local table. This
14236 // can still be a method call if it is followed by arguments or
14237 // a block, so we need to check for that here.
14238 if (
14239 (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
14240 (pm_accepts_block_stack_p(parser) && match2(parser, PM_TOKEN_KEYWORD_DO, PM_TOKEN_BRACE_LEFT))
14241 ) {
14242 pm_arguments_t arguments = { 0 };
14243 parse_arguments_list(parser, &arguments, true, accepts_command_call);
14244
14245 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
14246 pm_node_destroy(parser, node);
14247 return (pm_node_t *) fcall;
14248 }
14249 }
14250
14251 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
14252 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14253 }
14254
14255 return node;
14256 }
14258 // Here we have found a heredoc. We'll parse it and add it to the
14259 // list of strings.
14260 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
14261 assert(lex_mode->mode == PM_LEX_HEREDOC);
14262 pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
14263 pm_heredoc_indent_t indent = lex_mode->as.heredoc.indent;
14264
14265 parser_lex(parser);
14266 pm_token_t opening = parser->previous;
14267
14268 pm_node_t *node;
14269 pm_node_t *part;
14270
14271 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14272 // If we get here, then we have an empty heredoc. We'll create
14273 // an empty content token and return an empty string node.
14274 lex_mode_pop(parser);
14275 expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14276 pm_token_t content = parse_strings_empty_content(parser->previous.start);
14277
14278 if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
14279 node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
14280 } else {
14281 node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
14282 }
14283
14284 node->location.end = opening.end;
14285 } else if ((part = parse_string_part(parser)) == NULL) {
14286 // If we get here, then we tried to find something in the
14287 // heredoc but couldn't actually parse anything, so we'll just
14288 // return a missing node.
14289 node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
14290 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14291 // If we get here, then the part that we parsed was plain string
14292 // content and we're at the end of the heredoc, so we can return
14293 // just a string node with the heredoc opening and closing as
14294 // its opening and closing.
14295 pm_node_flag_set(part, parse_unescaped_encoding(parser));
14296 pm_string_node_t *cast = (pm_string_node_t *) part;
14297
14298 cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
14299 cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
14300 cast->base.location = cast->opening_loc;
14301
14302 if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
14303 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
14304 cast->base.type = PM_X_STRING_NODE;
14305 }
14306
14307 size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14308 if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
14309 parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
14310 }
14311
14312 node = (pm_node_t *) cast;
14313 lex_mode_pop(parser);
14314 expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14315 } else {
14316 // If we get here, then we have multiple parts in the heredoc,
14317 // so we'll need to create an interpolated string node to hold
14318 // them all.
14319 pm_node_list_t parts = { 0 };
14320 pm_node_list_append(&parts, part);
14321
14322 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
14323 if ((part = parse_string_part(parser)) != NULL) {
14324 pm_node_list_append(&parts, part);
14325 }
14326 }
14327
14328 size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
14329
14330 // Now that we have all of the parts, create the correct type of
14331 // interpolated node.
14332 if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
14333 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
14334 cast->parts = parts;
14335
14336 lex_mode_pop(parser);
14337 expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14338
14339 pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
14340 cast->base.location = cast->opening_loc;
14341 node = (pm_node_t *) cast;
14342 } else {
14343 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
14344
14345 lex_mode_pop(parser);
14346 expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
14347
14348 pm_interpolated_string_node_closing_set(cast, &parser->previous);
14349 cast->base.location = cast->opening_loc;
14350 node = (pm_node_t *) cast;
14351 }
14352
14353 // If this is a heredoc that is indented with a ~, then we need
14354 // to dedent each line by the common leading whitespace.
14355 if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
14356 pm_node_list_t *nodes;
14357 if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
14358 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
14359 } else {
14360 nodes = &((pm_interpolated_string_node_t *) node)->parts;
14361 }
14362
14363 parse_heredoc_dedent(parser, nodes, common_whitespace);
14364 }
14365 }
14366
14367 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
14368 return parse_strings(parser, node);
14369 }
14370
14371 return node;
14372 }
14374 parser_lex(parser);
14375 pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
14376
14377 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
14378 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX);
14379 }
14380
14381 return node;
14382 }
14383 case PM_TOKEN_INTEGER: {
14384 pm_node_flags_t base = parser->integer_base;
14385 parser_lex(parser);
14386 return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
14387 }
14389 pm_node_flags_t base = parser->integer_base;
14390 parser_lex(parser);
14391 return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
14392 }
14394 pm_node_flags_t base = parser->integer_base;
14395 parser_lex(parser);
14396 return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
14397 }
14399 pm_node_flags_t base = parser->integer_base;
14400 parser_lex(parser);
14401 return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
14402 }
14404 parser_lex(parser);
14405 return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
14407 parser_lex(parser);
14408 return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
14410 parser_lex(parser);
14411 return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
14413 if (binding_power != PM_BINDING_POWER_STATEMENT) {
14414 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
14415 }
14416
14417 parser_lex(parser);
14418 pm_token_t keyword = parser->previous;
14419
14420 pm_node_t *new_name = parse_alias_argument(parser, true);
14421 pm_node_t *old_name = parse_alias_argument(parser, false);
14422
14423 switch (PM_NODE_TYPE(new_name)) {
14429 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
14430 }
14431 } else {
14432 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
14433 }
14434
14435 return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
14436 }
14437 case PM_SYMBOL_NODE:
14440 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
14441 }
14442 }
14443 /* fallthrough */
14444 default:
14445 return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
14446 }
14447 }
14448 case PM_TOKEN_KEYWORD_CASE: {
14449 parser_lex(parser);
14450 pm_token_t case_keyword = parser->previous;
14451 pm_node_t *predicate = NULL;
14452
14453 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14454 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
14455 predicate = NULL;
14456 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
14457 predicate = NULL;
14458 } else if (!token_begins_expression_p(parser->current.type)) {
14459 predicate = NULL;
14460 } else {
14461 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CASE_EXPRESSION_AFTER_CASE);
14462 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
14463 }
14464
14465 if (accept1(parser, PM_TOKEN_KEYWORD_END)) {
14466 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
14467 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
14468 }
14469
14470 // At this point we can create a case node, though we don't yet know if it
14471 // is a case-in or case-when node.
14472 pm_token_t end_keyword = not_provided(parser);
14473 pm_node_t *node;
14474
14475 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
14476 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
14477
14478 // At this point we've seen a when keyword, so we know this is a
14479 // case-when node. We will continue to parse the when nodes until we hit
14480 // the end of the list.
14481 while (accept1(parser, PM_TOKEN_KEYWORD_WHEN)) {
14482 pm_token_t when_keyword = parser->previous;
14483 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
14484
14485 do {
14486 if (accept1(parser, PM_TOKEN_USTAR)) {
14487 pm_token_t operator = parser->previous;
14488 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
14489
14490 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
14491 pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
14492
14493 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
14494 } else {
14495 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN);
14496 pm_when_node_conditions_append(when_node, condition);
14497
14498 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
14499 }
14500 } while (accept1(parser, PM_TOKEN_COMMA));
14501
14502 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14503 accept1(parser, PM_TOKEN_KEYWORD_THEN);
14504 } else {
14505 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
14506 }
14507
14509 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN);
14510 if (statements != NULL) {
14511 pm_when_node_statements_set(when_node, statements);
14512 }
14513 }
14514
14515 pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
14516 }
14517
14518 // If we didn't parse any conditions (in or when) then we need
14519 // to indicate that we have an error.
14520 if (case_node->conditions.size == 0) {
14521 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
14522 }
14523
14524 node = (pm_node_t *) case_node;
14525 } else {
14526 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
14527
14528 // If this is a case-match node (i.e., it is a pattern matching
14529 // case statement) then we must have a predicate.
14530 if (predicate == NULL) {
14531 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
14532 }
14533
14534 // At this point we expect that we're parsing a case-in node. We will
14535 // continue to parse the in nodes until we hit the end of the list.
14536 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
14537 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
14538 parser->pattern_matching_newlines = true;
14539
14540 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
14541 parser->command_start = false;
14542 parser_lex(parser);
14543
14544 pm_token_t in_keyword = parser->previous;
14545 pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
14546 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
14547
14548 // Since we're in the top-level of the case-in node we need to check
14549 // for guard clauses in the form of `if` or `unless` statements.
14550 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
14551 pm_token_t keyword = parser->previous;
14552 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
14553 pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
14554 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
14555 pm_token_t keyword = parser->previous;
14556 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
14557 pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
14558 }
14559
14560 // Now we need to check for the terminator of the in node's pattern.
14561 // It can be a newline or semicolon optionally followed by a `then`
14562 // keyword.
14563 pm_token_t then_keyword;
14564 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14565 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14566 then_keyword = parser->previous;
14567 } else {
14568 then_keyword = not_provided(parser);
14569 }
14570 } else {
14571 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
14572 then_keyword = parser->previous;
14573 }
14574
14575 // Now we can actually parse the statements associated with the in
14576 // node.
14577 pm_statements_node_t *statements;
14579 statements = NULL;
14580 } else {
14581 statements = parse_statements(parser, PM_CONTEXT_CASE_IN);
14582 }
14583
14584 // Now that we have the full pattern and statements, we can create the
14585 // node and attach it to the case node.
14586 pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
14587 pm_case_match_node_condition_append(case_node, condition);
14588 }
14589
14590 // If we didn't parse any conditions (in or when) then we need
14591 // to indicate that we have an error.
14592 if (case_node->conditions.size == 0) {
14593 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
14594 }
14595
14596 node = (pm_node_t *) case_node;
14597 }
14598
14599 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14600 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14601 pm_token_t else_keyword = parser->previous;
14602 pm_else_node_t *else_node;
14603
14604 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14605 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE), &parser->current);
14606 } else {
14607 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
14608 }
14609
14610 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
14611 pm_case_node_consequent_set((pm_case_node_t *) node, else_node);
14612 } else {
14613 pm_case_match_node_consequent_set((pm_case_match_node_t *) node, else_node);
14614 }
14615 }
14616
14617 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
14618 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
14619 pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
14620 } else {
14621 pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
14622 }
14623
14624 return node;
14625 }
14627 parser_lex(parser);
14628
14629 pm_token_t begin_keyword = parser->previous;
14630 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14631 pm_statements_node_t *begin_statements = NULL;
14632
14634 pm_accepts_block_stack_push(parser, true);
14635 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN);
14636 pm_accepts_block_stack_pop(parser);
14637 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14638 }
14639
14640 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
14641 parse_rescues(parser, begin_node, false);
14642
14643 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
14644 begin_node->base.location.end = parser->previous.end;
14645 pm_begin_node_end_keyword_set(begin_node, &parser->previous);
14646
14647 if ((begin_node->else_clause != NULL) && (begin_node->rescue_clause == NULL)) {
14648 pm_parser_err_node(parser, (pm_node_t *) begin_node->else_clause, PM_ERR_BEGIN_LONELY_ELSE);
14649 }
14650
14651 return (pm_node_t *) begin_node;
14652 }
14654 if (binding_power != PM_BINDING_POWER_STATEMENT) {
14655 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
14656 }
14657
14658 parser_lex(parser);
14659 pm_token_t keyword = parser->previous;
14660
14661 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
14662 pm_token_t opening = parser->previous;
14663 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE);
14664
14665 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
14666 pm_context_t context = parser->current_context->context;
14667 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
14668 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
14669 }
14670 return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
14671 }
14675 parser_lex(parser);
14676
14677 pm_token_t keyword = parser->previous;
14678 pm_arguments_t arguments = { 0 };
14679
14680 if (
14681 token_begins_expression_p(parser->current.type) ||
14682 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
14683 ) {
14684 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14685
14686 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
14687 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF);
14688 }
14689 }
14690
14691 switch (keyword.type) {
14693 return (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
14695 return (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
14697 if (
14700 ) {
14701 pm_parser_err_current(parser, PM_ERR_RETURN_INVALID);
14702 }
14703 return (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
14704 }
14705 default:
14706 assert(false && "unreachable");
14707 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
14708 }
14709 }
14711 parser_lex(parser);
14712
14713 pm_token_t keyword = parser->previous;
14714 pm_arguments_t arguments = { 0 };
14715 parse_arguments_list(parser, &arguments, true, accepts_command_call);
14716
14717 if (
14718 arguments.opening_loc.start == NULL &&
14719 arguments.arguments == NULL &&
14720 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
14721 ) {
14722 return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
14723 }
14724
14725 return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
14726 }
14728 parser_lex(parser);
14729
14730 pm_token_t keyword = parser->previous;
14731 pm_arguments_t arguments = { 0 };
14732 parse_arguments_list(parser, &arguments, false, accepts_command_call);
14733
14734 return (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
14735 }
14737 parser_lex(parser);
14738 pm_token_t class_keyword = parser->previous;
14739 pm_do_loop_stack_push(parser, false);
14740
14741 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
14742 pm_token_t operator = parser->previous;
14743 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS);
14744
14745 pm_constant_id_t old_param_name = parser->current_param_name;
14746 parser->current_param_name = 0;
14747 pm_parser_scope_push(parser, true);
14748 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14749
14750 pm_node_t *statements = NULL;
14752 pm_accepts_block_stack_push(parser, true);
14753 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS);
14754 pm_accepts_block_stack_pop(parser);
14755 }
14756
14757 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14758 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14759 statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
14760 }
14761
14762 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
14763
14764 pm_constant_id_list_t locals = parser->current_scope->locals;
14765 pm_parser_scope_pop(parser);
14766 parser->current_param_name = old_param_name;
14767 pm_do_loop_stack_pop(parser);
14768 return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
14769 }
14770
14771 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_CLASS_NAME);
14772 pm_token_t name = parser->previous;
14773 if (name.type != PM_TOKEN_CONSTANT) {
14774 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
14775 }
14776
14777 pm_token_t inheritance_operator;
14778 pm_node_t *superclass;
14779
14780 if (match1(parser, PM_TOKEN_LESS)) {
14781 inheritance_operator = parser->current;
14782 lex_state_set(parser, PM_LEX_STATE_BEG);
14783
14784 parser->command_start = true;
14785 parser_lex(parser);
14786
14787 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CLASS_SUPERCLASS);
14788 } else {
14789 inheritance_operator = not_provided(parser);
14790 superclass = NULL;
14791 }
14792
14793 pm_constant_id_t old_param_name = parser->current_param_name;
14794 parser->current_param_name = 0;
14795 pm_parser_scope_push(parser, true);
14796 if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
14797 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
14798 } else {
14799 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14800 }
14801 pm_node_t *statements = NULL;
14802
14804 pm_accepts_block_stack_push(parser, true);
14805 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS);
14806 pm_accepts_block_stack_pop(parser);
14807 }
14808
14809 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14810 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14811 statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
14812 }
14813
14814 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
14815
14816 if (context_def_p(parser)) {
14817 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
14818 }
14819
14820 pm_constant_id_list_t locals = parser->current_scope->locals;
14821 pm_parser_scope_pop(parser);
14822 parser->current_param_name = old_param_name;
14823 pm_do_loop_stack_pop(parser);
14824
14825 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
14826 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
14827 }
14828
14829 return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
14830 }
14831 case PM_TOKEN_KEYWORD_DEF: {
14832 pm_token_t def_keyword = parser->current;
14833
14834 pm_node_t *receiver = NULL;
14835 pm_token_t operator = not_provided(parser);
14836 pm_token_t name = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end };
14837
14838 // This context is necessary for lexing `...` in a bare params correctly.
14839 // It must be pushed before lexing the first param, so it is here.
14840 context_push(parser, PM_CONTEXT_DEF_PARAMS);
14841 parser_lex(parser);
14842 pm_constant_id_t old_param_name = parser->current_param_name;
14843
14844 switch (parser->current.type) {
14845 case PM_CASE_OPERATOR:
14846 pm_parser_scope_push(parser, true);
14847 parser->current_param_name = 0;
14848 lex_state_set(parser, PM_LEX_STATE_ENDFN);
14849 parser_lex(parser);
14850 name = parser->previous;
14851 break;
14852 case PM_TOKEN_IDENTIFIER: {
14853 parser_lex(parser);
14854
14855 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
14856 receiver = parse_variable_call(parser);
14857
14858 pm_parser_scope_push(parser, true);
14859 parser->current_param_name = 0;
14860 lex_state_set(parser, PM_LEX_STATE_FNAME);
14861 parser_lex(parser);
14862
14863 operator = parser->previous;
14864 name = parse_method_definition_name(parser);
14865 } else {
14866 pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
14867 pm_parser_scope_push(parser, true);
14868 parser->current_param_name = 0;
14869 name = parser->previous;
14870 }
14871
14872 break;
14873 }
14874 case PM_TOKEN_CONSTANT:
14885 pm_parser_scope_push(parser, true);
14886 parser->current_param_name = 0;
14887 parser_lex(parser);
14888 pm_token_t identifier = parser->previous;
14889
14890 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
14891 lex_state_set(parser, PM_LEX_STATE_FNAME);
14892 parser_lex(parser);
14893 operator = parser->previous;
14894
14895 switch (identifier.type) {
14896 case PM_TOKEN_CONSTANT:
14897 receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
14898 break;
14900 receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
14901 break;
14903 receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
14904 break;
14906 receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
14907 break;
14909 receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
14910 break;
14912 receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
14913 break;
14915 receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
14916 break;
14918 receiver = (pm_node_t *)pm_false_node_create(parser, &identifier);
14919 break;
14921 receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
14922 break;
14924 receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
14925 break;
14927 receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
14928 break;
14929 default:
14930 break;
14931 }
14932
14933 name = parse_method_definition_name(parser);
14934 } else {
14935 name = identifier;
14936 }
14937 break;
14938 }
14940 // The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner expression
14941 // of this parenthesis should not be processed under this context.
14942 // Thus, the context is popped here.
14943 context_pop(parser);
14944 parser_lex(parser);
14945
14946 pm_token_t lparen = parser->previous;
14947 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, PM_ERR_DEF_RECEIVER);
14948
14949 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
14950 pm_token_t rparen = parser->previous;
14951
14952 lex_state_set(parser, PM_LEX_STATE_FNAME);
14953 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
14954
14955 operator = parser->previous;
14956 receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
14957
14958 pm_parser_scope_push(parser, true);
14959 parser->current_param_name = 0;
14960
14961 // To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as described the above.
14962 context_push(parser, PM_CONTEXT_DEF_PARAMS);
14963 name = parse_method_definition_name(parser);
14964 break;
14965 }
14966 default:
14967 pm_parser_scope_push(parser, true);
14968 parser->current_param_name = 0;
14969 name = parse_method_definition_name(parser);
14970 break;
14971 }
14972
14973 // If, after all that, we were unable to find a method name, add an
14974 // error to the error list.
14975 if (name.type == PM_TOKEN_MISSING) {
14976 pm_parser_err_previous(parser, PM_ERR_DEF_NAME);
14977 }
14978
14979 pm_token_t lparen;
14980 pm_token_t rparen;
14981 pm_parameters_node_t *params;
14982
14983 switch (parser->current.type) {
14985 parser_lex(parser);
14986 lparen = parser->previous;
14987
14988 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14989 params = NULL;
14990 } else {
14991 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true);
14992 }
14993
14994 lex_state_set(parser, PM_LEX_STATE_BEG);
14995 parser->command_start = true;
14996
14997 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_DEF_PARAMS_TERM_PAREN);
14998 rparen = parser->previous;
14999 break;
15000 }
15001 case PM_CASE_PARAMETER: {
15002 // If we're about to lex a label, we need to add the label
15003 // state to make sure the next newline is ignored.
15004 if (parser->current.type == PM_TOKEN_LABEL) {
15005 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
15006 }
15007
15008 lparen = not_provided(parser);
15009 rparen = not_provided(parser);
15010 params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true);
15011 break;
15012 }
15013 default: {
15014 lparen = not_provided(parser);
15015 rparen = not_provided(parser);
15016 params = NULL;
15017 break;
15018 }
15019 }
15020
15021 uint32_t locals_body_index = (uint32_t) parser->current_scope->locals.size;
15022
15023 context_pop(parser);
15024 pm_node_t *statements = NULL;
15025 pm_token_t equal;
15026 pm_token_t end_keyword;
15027
15028 if (accept1(parser, PM_TOKEN_EQUAL)) {
15029 if (token_is_setter_name(&name)) {
15030 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
15031 }
15032 equal = parser->previous;
15033
15034 context_push(parser, PM_CONTEXT_DEF);
15035 pm_do_loop_stack_push(parser, false);
15036 statements = (pm_node_t *) pm_statements_node_create(parser);
15037
15038 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, PM_ERR_DEF_ENDLESS);
15039
15040 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
15041 pm_token_t rescue_keyword = parser->previous;
15042 pm_node_t *value = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
15043 pm_rescue_modifier_node_t *rescue_node = pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
15044 statement = (pm_node_t *)rescue_node;
15045 }
15046
15047 pm_statements_node_body_append((pm_statements_node_t *) statements, statement);
15048 pm_do_loop_stack_pop(parser);
15049 context_pop(parser);
15050 end_keyword = not_provided(parser);
15051 } else {
15052 equal = not_provided(parser);
15053
15054 if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
15055 lex_state_set(parser, PM_LEX_STATE_BEG);
15056 parser->command_start = true;
15057 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
15058 } else {
15059 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15060 }
15061
15062 pm_accepts_block_stack_push(parser, true);
15063 pm_do_loop_stack_push(parser, false);
15064
15066 pm_accepts_block_stack_push(parser, true);
15067 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF);
15068 pm_accepts_block_stack_pop(parser);
15069 }
15070
15071 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15072 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15073 statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, true);
15074 }
15075
15076 pm_accepts_block_stack_pop(parser);
15077 pm_do_loop_stack_pop(parser);
15078 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
15079 end_keyword = parser->previous;
15080 }
15081
15082 pm_constant_id_list_t locals = parser->current_scope->locals;
15083 parser->current_param_name = old_param_name;
15084 pm_parser_scope_pop(parser);
15085
15086 return (pm_node_t *) pm_def_node_create(
15087 parser,
15088 &name,
15089 receiver,
15090 params,
15091 statements,
15092 &locals,
15093 locals_body_index,
15094 &def_keyword,
15095 &operator,
15096 &lparen,
15097 &rparen,
15098 &equal,
15099 &end_keyword
15100 );
15101 }
15103 parser_lex(parser);
15104 pm_token_t keyword = parser->previous;
15105
15106 pm_token_t lparen;
15107 pm_token_t rparen;
15108 pm_node_t *expression;
15109
15110 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15111 lparen = parser->previous;
15112 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_DEFINED_EXPRESSION);
15113
15114 if (parser->recovering) {
15115 rparen = not_provided(parser);
15116 } else {
15117 accept1(parser, PM_TOKEN_NEWLINE);
15118 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
15119 rparen = parser->previous;
15120 }
15121 } else {
15122 lparen = not_provided(parser);
15123 rparen = not_provided(parser);
15124 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_DEFINED_EXPRESSION);
15125 }
15126
15127 return (pm_node_t *) pm_defined_node_create(
15128 parser,
15129 &lparen,
15130 expression,
15131 &rparen,
15132 &PM_LOCATION_TOKEN_VALUE(&keyword)
15133 );
15134 }
15136 if (binding_power != PM_BINDING_POWER_STATEMENT) {
15137 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
15138 }
15139
15140 parser_lex(parser);
15141 pm_token_t keyword = parser->previous;
15142
15143 if (context_def_p(parser)) {
15144 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
15145 }
15146
15147 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
15148 pm_token_t opening = parser->previous;
15149 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE);
15150
15151 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
15152 return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
15153 }
15155 parser_lex(parser);
15156 return (pm_node_t *)pm_false_node_create(parser, &parser->previous);
15157 case PM_TOKEN_KEYWORD_FOR: {
15158 parser_lex(parser);
15159 pm_token_t for_keyword = parser->previous;
15160 pm_node_t *index;
15161
15162 context_push(parser, PM_CONTEXT_FOR_INDEX);
15163
15164 // First, parse out the first index expression.
15165 if (accept1(parser, PM_TOKEN_USTAR)) {
15166 pm_token_t star_operator = parser->previous;
15167 pm_node_t *name = NULL;
15168
15169 if (token_begins_expression_p(parser->current.type)) {
15170 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
15171 }
15172
15173 index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
15174 } else if (token_begins_expression_p(parser->current.type)) {
15175 index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
15176 } else {
15177 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
15178 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
15179 }
15180
15181 // Now, if there are multiple index expressions, parse them out.
15182 if (match1(parser, PM_TOKEN_COMMA)) {
15183 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX);
15184 } else {
15185 index = parse_target(parser, index);
15186 }
15187
15188 context_pop(parser);
15189 pm_do_loop_stack_push(parser, true);
15190
15191 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
15192 pm_token_t in_keyword = parser->previous;
15193
15194 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_FOR_COLLECTION);
15195 pm_do_loop_stack_pop(parser);
15196
15197 pm_token_t do_keyword;
15198 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
15199 do_keyword = parser->previous;
15200 } else {
15201 do_keyword = not_provided(parser);
15202 }
15203
15204 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
15205 pm_statements_node_t *statements = NULL;
15206
15207 if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
15208 statements = parse_statements(parser, PM_CONTEXT_FOR);
15209 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
15210 }
15211
15212 return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
15213 }
15215 parser_lex(parser);
15216 return parse_conditional(parser, PM_CONTEXT_IF);
15218 if (binding_power != PM_BINDING_POWER_STATEMENT) {
15219 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
15220 }
15221
15222 parser_lex(parser);
15223 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
15224 pm_node_t *name = parse_undef_argument(parser);
15225
15226 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
15227 pm_node_destroy(parser, name);
15228 } else {
15229 pm_undef_node_append(undef, name);
15230
15231 while (match1(parser, PM_TOKEN_COMMA)) {
15232 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
15233 parser_lex(parser);
15234 name = parse_undef_argument(parser);
15235
15236 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
15237 pm_node_destroy(parser, name);
15238 break;
15239 }
15240
15241 pm_undef_node_append(undef, name);
15242 }
15243 }
15244
15245 return (pm_node_t *) undef;
15246 }
15247 case PM_TOKEN_KEYWORD_NOT: {
15248 parser_lex(parser);
15249
15250 pm_token_t message = parser->previous;
15251 pm_arguments_t arguments = { 0 };
15252 pm_node_t *receiver = NULL;
15253
15254 accept1(parser, PM_TOKEN_NEWLINE);
15255
15256 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15257 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15258
15259 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15260 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15261 } else {
15262 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_NOT_EXPRESSION);
15263 pm_conditional_predicate(receiver);
15264
15265 if (!parser->recovering) {
15266 accept1(parser, PM_TOKEN_NEWLINE);
15267 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
15268 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15269 }
15270 }
15271 } else {
15272 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, PM_ERR_NOT_EXPRESSION);
15273 pm_conditional_predicate(receiver);
15274 }
15275
15276 return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
15277 }
15279 parser_lex(parser);
15280 return parse_conditional(parser, PM_CONTEXT_UNLESS);
15282 parser_lex(parser);
15283
15284 pm_token_t module_keyword = parser->previous;
15285 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_MODULE_NAME);
15286 pm_token_t name;
15287
15288 // If we can recover from a syntax error that occurred while parsing
15289 // the name of the module, then we'll handle that here.
15290 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
15291 pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15292 return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
15293 }
15294
15295 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
15296 pm_token_t double_colon = parser->previous;
15297
15298 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
15299 pm_node_t *constant = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
15300
15301 constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, constant);
15302 }
15303
15304 // Here we retrieve the name of the module. If it wasn't a constant,
15305 // then it's possible that `module foo` was passed, which is a
15306 // syntax error. We handle that here as well.
15307 name = parser->previous;
15308 if (name.type != PM_TOKEN_CONSTANT) {
15309 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
15310 }
15311
15312 pm_constant_id_t old_param_name = parser->current_param_name;
15313 parser->current_param_name = 0;
15314 pm_parser_scope_push(parser, true);
15315 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
15316 pm_node_t *statements = NULL;
15317
15319 pm_accepts_block_stack_push(parser, true);
15320 statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE);
15321 pm_accepts_block_stack_pop(parser);
15322 }
15323
15324 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15325 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15326 statements = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) statements, false);
15327 }
15328
15329 pm_constant_id_list_t locals = parser->current_scope->locals;
15330 pm_parser_scope_pop(parser);
15331 parser->current_param_name = old_param_name;
15332
15333 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
15334
15335 if (context_def_p(parser)) {
15336 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
15337 }
15338
15339 return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
15340 }
15342 parser_lex(parser);
15343 return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
15345 parser_lex(parser);
15346 return (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
15348 parser_lex(parser);
15349 return (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
15351 parser_lex(parser);
15352 return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
15354 parser_lex(parser);
15355 return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
15357 pm_do_loop_stack_push(parser, true);
15358 parser_lex(parser);
15359 pm_token_t keyword = parser->previous;
15360
15361 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15362 pm_do_loop_stack_pop(parser);
15363
15364 expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
15365 pm_statements_node_t *statements = NULL;
15366
15367 if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
15368 pm_accepts_block_stack_push(parser, true);
15369 statements = parse_statements(parser, PM_CONTEXT_UNTIL);
15370 pm_accepts_block_stack_pop(parser);
15371 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15372 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
15373 }
15374
15375 return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
15376 }
15378 pm_do_loop_stack_push(parser, true);
15379 parser_lex(parser);
15380 pm_token_t keyword = parser->previous;
15381
15382 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15383 pm_do_loop_stack_pop(parser);
15384
15385 expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
15386 pm_statements_node_t *statements = NULL;
15387
15388 if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
15389 pm_accepts_block_stack_push(parser, true);
15390 statements = parse_statements(parser, PM_CONTEXT_WHILE);
15391 pm_accepts_block_stack_pop(parser);
15392 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15393 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
15394 }
15395
15396 return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
15397 }
15399 parser_lex(parser);
15400 pm_token_t opening = parser->previous;
15401 pm_array_node_t *array = pm_array_node_create(parser, &opening);
15402
15403 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15404 accept1(parser, PM_TOKEN_WORDS_SEP);
15405 if (match1(parser, PM_TOKEN_STRING_END)) break;
15406
15407 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15408 pm_token_t opening = not_provided(parser);
15409 pm_token_t closing = not_provided(parser);
15410 pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
15411 }
15412
15413 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
15414 }
15415
15416 pm_token_t closing = parser->current;
15417 if (match1(parser, PM_TOKEN_EOF)) {
15418 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
15419 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15420 } else {
15421 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
15422 }
15423 pm_array_node_close_set(array, &closing);
15424
15425 return (pm_node_t *) array;
15426 }
15428 parser_lex(parser);
15429 pm_token_t opening = parser->previous;
15430 pm_array_node_t *array = pm_array_node_create(parser, &opening);
15431
15432 // This is the current node that we are parsing that will be added to the
15433 // list of elements.
15434 pm_node_t *current = NULL;
15435
15436 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15437 switch (parser->current.type) {
15438 case PM_TOKEN_WORDS_SEP: {
15439 if (current == NULL) {
15440 // If we hit a separator before we have any content, then we don't
15441 // need to do anything.
15442 } else {
15443 // If we hit a separator after we've hit content, then we need to
15444 // append that content to the list and reset the current node.
15445 pm_array_node_elements_append(array, current);
15446 current = NULL;
15447 }
15448
15449 parser_lex(parser);
15450 break;
15451 }
15453 pm_token_t opening = not_provided(parser);
15454 pm_token_t closing = not_provided(parser);
15455
15456 if (current == NULL) {
15457 // If we hit content and the current node is NULL, then this is
15458 // the first string content we've seen. In that case we're going
15459 // to create a new string node and set that to the current.
15460 current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
15461 parser_lex(parser);
15462 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
15463 // If we hit string content and the current node is an
15464 // interpolated string, then we need to append the string content
15465 // to the list of child nodes.
15466 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
15467 parser_lex(parser);
15468
15469 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
15470 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
15471 // If we hit string content and the current node is a string node,
15472 // then we need to convert the current node into an interpolated
15473 // string and add the string content to the list of child nodes.
15474 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
15475 parser_lex(parser);
15476
15477 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
15478 pm_interpolated_symbol_node_append(interpolated, current);
15479 pm_interpolated_symbol_node_append(interpolated, string);
15480 current = (pm_node_t *) interpolated;
15481 } else {
15482 assert(false && "unreachable");
15483 }
15484
15485 break;
15486 }
15487 case PM_TOKEN_EMBVAR: {
15488 bool start_location_set = false;
15489 if (current == NULL) {
15490 // If we hit an embedded variable and the current node is NULL,
15491 // then this is the start of a new string. We'll set the current
15492 // node to a new interpolated string.
15493 pm_token_t opening = not_provided(parser);
15494 pm_token_t closing = not_provided(parser);
15495 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
15496 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
15497 // If we hit an embedded variable and the current node is a string
15498 // node, then we'll convert the current into an interpolated
15499 // string and add the string node to the list of parts.
15500 pm_token_t opening = not_provided(parser);
15501 pm_token_t closing = not_provided(parser);
15502 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
15503
15504 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
15505 pm_interpolated_symbol_node_append(interpolated, current);
15506 interpolated->base.location.start = current->location.start;
15507 start_location_set = true;
15508 current = (pm_node_t *) interpolated;
15509 } else {
15510 // If we hit an embedded variable and the current node is an
15511 // interpolated string, then we'll just add the embedded variable.
15512 }
15513
15514 pm_node_t *part = parse_string_part(parser);
15515 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
15516 if (!start_location_set) {
15517 current->location.start = part->location.start;
15518 }
15519 break;
15520 }
15522 bool start_location_set = false;
15523 if (current == NULL) {
15524 // If we hit an embedded expression and the current node is NULL,
15525 // then this is the start of a new string. We'll set the current
15526 // node to a new interpolated string.
15527 pm_token_t opening = not_provided(parser);
15528 pm_token_t closing = not_provided(parser);
15529 current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
15530 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
15531 // If we hit an embedded expression and the current node is a
15532 // string node, then we'll convert the current into an
15533 // interpolated string and add the string node to the list of
15534 // parts.
15535 pm_token_t opening = not_provided(parser);
15536 pm_token_t closing = not_provided(parser);
15537 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
15538
15539 current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
15540 pm_interpolated_symbol_node_append(interpolated, current);
15541 interpolated->base.location.start = current->location.start;
15542 start_location_set = true;
15543 current = (pm_node_t *) interpolated;
15544 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
15545 // If we hit an embedded expression and the current node is an
15546 // interpolated string, then we'll just continue on.
15547 } else {
15548 assert(false && "unreachable");
15549 }
15550
15551 pm_node_t *part = parse_string_part(parser);
15552 pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
15553 if (!start_location_set) {
15554 current->location.start = part->location.start;
15555 }
15556 break;
15557 }
15558 default:
15559 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
15560 parser_lex(parser);
15561 break;
15562 }
15563 }
15564
15565 // If we have a current node, then we need to append it to the list.
15566 if (current) {
15567 pm_array_node_elements_append(array, current);
15568 }
15569
15570 pm_token_t closing = parser->current;
15571 if (match1(parser, PM_TOKEN_EOF)) {
15572 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
15573 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15574 } else {
15575 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
15576 }
15577 pm_array_node_close_set(array, &closing);
15578
15579 return (pm_node_t *) array;
15580 }
15582 parser_lex(parser);
15583 pm_token_t opening = parser->previous;
15584 pm_array_node_t *array = pm_array_node_create(parser, &opening);
15585
15586 // skip all leading whitespaces
15587 accept1(parser, PM_TOKEN_WORDS_SEP);
15588
15589 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15590 accept1(parser, PM_TOKEN_WORDS_SEP);
15591 if (match1(parser, PM_TOKEN_STRING_END)) break;
15592
15593 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15594 pm_token_t opening = not_provided(parser);
15595 pm_token_t closing = not_provided(parser);
15596
15597 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
15598 pm_array_node_elements_append(array, string);
15599 }
15600
15601 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
15602 }
15603
15604 pm_token_t closing = parser->current;
15605 if (match1(parser, PM_TOKEN_EOF)) {
15606 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
15607 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15608 } else {
15609 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
15610 }
15611
15612 pm_array_node_close_set(array, &closing);
15613 return (pm_node_t *) array;
15614 }
15616 parser_lex(parser);
15617 pm_token_t opening = parser->previous;
15618 pm_array_node_t *array = pm_array_node_create(parser, &opening);
15619
15620 // This is the current node that we are parsing that will be added
15621 // to the list of elements.
15622 pm_node_t *current = NULL;
15623
15624 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15625 switch (parser->current.type) {
15626 case PM_TOKEN_WORDS_SEP: {
15627 // Reset the explicit encoding if we hit a separator
15628 // since each element can have its own encoding.
15629 parser->explicit_encoding = NULL;
15630
15631 if (current == NULL) {
15632 // If we hit a separator before we have any content,
15633 // then we don't need to do anything.
15634 } else {
15635 // If we hit a separator after we've hit content,
15636 // then we need to append that content to the list
15637 // and reset the current node.
15638 pm_array_node_elements_append(array, current);
15639 current = NULL;
15640 }
15641
15642 parser_lex(parser);
15643 break;
15644 }
15646 pm_token_t opening = not_provided(parser);
15647 pm_token_t closing = not_provided(parser);
15648
15649 pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
15650 pm_node_flag_set(string, parse_unescaped_encoding(parser));
15651 parser_lex(parser);
15652
15653 if (current == NULL) {
15654 // If we hit content and the current node is NULL,
15655 // then this is the first string content we've seen.
15656 // In that case we're going to create a new string
15657 // node and set that to the current.
15658 current = string;
15659 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
15660 // If we hit string content and the current node is
15661 // an interpolated string, then we need to append
15662 // the string content to the list of child nodes.
15663 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
15664 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
15665 // If we hit string content and the current node is
15666 // a string node, then we need to convert the
15667 // current node into an interpolated string and add
15668 // the string content to the list of child nodes.
15669 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15670 pm_interpolated_string_node_append(interpolated, current);
15671 pm_interpolated_string_node_append(interpolated, string);
15672 current = (pm_node_t *) interpolated;
15673 } else {
15674 assert(false && "unreachable");
15675 }
15676
15677 break;
15678 }
15679 case PM_TOKEN_EMBVAR: {
15680 if (current == NULL) {
15681 // If we hit an embedded variable and the current
15682 // node is NULL, then this is the start of a new
15683 // string. We'll set the current node to a new
15684 // interpolated string.
15685 pm_token_t opening = not_provided(parser);
15686 pm_token_t closing = not_provided(parser);
15687 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15688 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
15689 // If we hit an embedded variable and the current
15690 // node is a string node, then we'll convert the
15691 // current into an interpolated string and add the
15692 // string node to the list of parts.
15693 pm_token_t opening = not_provided(parser);
15694 pm_token_t closing = not_provided(parser);
15695 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15696 pm_interpolated_string_node_append(interpolated, current);
15697 current = (pm_node_t *) interpolated;
15698 } else {
15699 // If we hit an embedded variable and the current
15700 // node is an interpolated string, then we'll just
15701 // add the embedded variable.
15702 }
15703
15704 pm_node_t *part = parse_string_part(parser);
15705 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
15706 break;
15707 }
15709 if (current == NULL) {
15710 // If we hit an embedded expression and the current
15711 // node is NULL, then this is the start of a new
15712 // string. We'll set the current node to a new
15713 // interpolated string.
15714 pm_token_t opening = not_provided(parser);
15715 pm_token_t closing = not_provided(parser);
15716 current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15717 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
15718 // If we hit an embedded expression and the current
15719 // node is a string node, then we'll convert the
15720 // current into an interpolated string and add the
15721 // string node to the list of parts.
15722 pm_token_t opening = not_provided(parser);
15723 pm_token_t closing = not_provided(parser);
15724 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
15725 pm_interpolated_string_node_append(interpolated, current);
15726 current = (pm_node_t *) interpolated;
15727 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
15728 // If we hit an embedded expression and the current
15729 // node is an interpolated string, then we'll just
15730 // continue on.
15731 } else {
15732 assert(false && "unreachable");
15733 }
15734
15735 pm_node_t *part = parse_string_part(parser);
15736 pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
15737 break;
15738 }
15739 default:
15740 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
15741 parser_lex(parser);
15742 break;
15743 }
15744 }
15745
15746 // If we have a current node, then we need to append it to the list.
15747 if (current) {
15748 pm_array_node_elements_append(array, current);
15749 }
15750
15751 pm_token_t closing = parser->current;
15752 if (match1(parser, PM_TOKEN_EOF)) {
15753 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
15754 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15755 } else {
15756 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
15757 }
15758
15759 pm_array_node_close_set(array, &closing);
15760 return (pm_node_t *) array;
15761 }
15762 case PM_TOKEN_REGEXP_BEGIN: {
15763 pm_token_t opening = parser->current;
15764 parser_lex(parser);
15765
15766 if (match1(parser, PM_TOKEN_REGEXP_END)) {
15767 // If we get here, then we have an end immediately after a start. In
15768 // that case we'll create an empty content token and return an
15769 // uninterpolated regular expression.
15770 pm_token_t content = (pm_token_t) {
15772 .start = parser->previous.end,
15773 .end = parser->previous.end
15774 };
15775
15776 parser_lex(parser);
15777 return (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
15778 }
15779
15781
15782 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15783 // In this case we've hit string content so we know the regular
15784 // expression at least has something in it. We'll need to check if the
15785 // following token is the end (in which case we can return a plain
15786 // regular expression) or if it's not then it has interpolation.
15787 pm_string_t unescaped = parser->current_string;
15788 pm_token_t content = parser->current;
15789 parser_lex(parser);
15790
15791 // If we hit an end, then we can create a regular expression node
15792 // without interpolation, which can be represented more succinctly and
15793 // more easily compiled.
15794 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
15795 return (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
15796 }
15797
15798 // If we get here, then we have interpolation so we'll need to create
15799 // a regular expression node with interpolation.
15800 node = pm_interpolated_regular_expression_node_create(parser, &opening);
15801
15802 pm_token_t opening = not_provided(parser);
15803 pm_token_t closing = not_provided(parser);
15804 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
15805 pm_interpolated_regular_expression_node_append(node, part);
15806 } else {
15807 // If the first part of the body of the regular expression is not a
15808 // string content, then we have interpolation and we need to create an
15809 // interpolated regular expression node.
15810 node = pm_interpolated_regular_expression_node_create(parser, &opening);
15811 }
15812
15813 // Now that we're here and we have interpolation, we'll parse all of the
15814 // parts into the list.
15815 pm_node_t *part;
15816 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
15817 if ((part = parse_string_part(parser)) != NULL) {
15818 pm_interpolated_regular_expression_node_append(node, part);
15819 }
15820 }
15821
15822 pm_token_t closing = parser->current;
15823 if (match1(parser, PM_TOKEN_EOF)) {
15824 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
15825 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15826 } else {
15827 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
15828 }
15829 pm_interpolated_regular_expression_node_closing_set(node, &closing);
15830
15831 return (pm_node_t *) node;
15832 }
15833 case PM_TOKEN_BACKTICK:
15835 parser_lex(parser);
15836 pm_token_t opening = parser->previous;
15837
15838 // When we get here, we don't know if this string is going to have
15839 // interpolation or not, even though it is allowed. Still, we want to be
15840 // able to return a string node without interpolation if we can since
15841 // it'll be faster.
15842 if (match1(parser, PM_TOKEN_STRING_END)) {
15843 // If we get here, then we have an end immediately after a start. In
15844 // that case we'll create an empty content token and return an
15845 // uninterpolated string.
15846 pm_token_t content = (pm_token_t) {
15848 .start = parser->previous.end,
15849 .end = parser->previous.end
15850 };
15851
15852 parser_lex(parser);
15853 return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
15854 }
15855
15857
15858 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15859 // In this case we've hit string content so we know the string
15860 // at least has something in it. We'll need to check if the
15861 // following token is the end (in which case we can return a
15862 // plain string) or if it's not then it has interpolation.
15863 pm_string_t unescaped = parser->current_string;
15864 pm_token_t content = parser->current;
15865 parser_lex(parser);
15866
15867 if (match1(parser, PM_TOKEN_STRING_END)) {
15868 pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
15869 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15870 parser_lex(parser);
15871 return node;
15872 }
15873
15874 // If we get here, then we have interpolation so we'll need to
15875 // create a string node with interpolation.
15876 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
15877
15878 pm_token_t opening = not_provided(parser);
15879 pm_token_t closing = not_provided(parser);
15880
15881 pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
15882 pm_node_flag_set(part, parse_unescaped_encoding(parser));
15883
15884 pm_interpolated_xstring_node_append(node, part);
15885 } else {
15886 // If the first part of the body of the string is not a string
15887 // content, then we have interpolation and we need to create an
15888 // interpolated string node.
15889 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
15890 }
15891
15892 pm_node_t *part;
15893 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15894 if ((part = parse_string_part(parser)) != NULL) {
15895 pm_interpolated_xstring_node_append(node, part);
15896 }
15897 }
15898
15899 pm_token_t closing = parser->current;
15900 if (match1(parser, PM_TOKEN_EOF)) {
15901 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
15902 closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15903 } else {
15904 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
15905 }
15906 pm_interpolated_xstring_node_closing_set(node, &closing);
15907
15908 return (pm_node_t *) node;
15909 }
15910 case PM_TOKEN_USTAR: {
15911 parser_lex(parser);
15912
15913 // * operators at the beginning of expressions are only valid in the
15914 // context of a multiple assignment. We enforce that here. We'll
15915 // still lex past it though and create a missing node place.
15916 if (binding_power != PM_BINDING_POWER_STATEMENT) {
15917 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
15918 }
15919
15920 pm_token_t operator = parser->previous;
15921 pm_node_t *name = NULL;
15922
15923 if (token_begins_expression_p(parser->current.type)) {
15924 name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR);
15925 }
15926
15927 pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
15928
15929 if (match1(parser, PM_TOKEN_COMMA)) {
15930 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX);
15931 } else {
15932 return parse_target_validate(parser, splat);
15933 }
15934 }
15935 case PM_TOKEN_BANG: {
15936 parser_lex(parser);
15937
15938 pm_token_t operator = parser->previous;
15939 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER_BANG);
15940 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
15941
15942 pm_conditional_predicate(receiver);
15943 return (pm_node_t *) node;
15944 }
15945 case PM_TOKEN_TILDE: {
15946 parser_lex(parser);
15947
15948 pm_token_t operator = parser->previous;
15949 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_TILDE);
15950 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
15951
15952 return (pm_node_t *) node;
15953 }
15954 case PM_TOKEN_UMINUS: {
15955 parser_lex(parser);
15956
15957 pm_token_t operator = parser->previous;
15958 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
15959 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
15960
15961 return (pm_node_t *) node;
15962 }
15963 case PM_TOKEN_UMINUS_NUM: {
15964 parser_lex(parser);
15965
15966 pm_token_t operator = parser->previous;
15967 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
15968
15969 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
15970 pm_token_t exponent_operator = parser->previous;
15971 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, PM_ERR_EXPECT_ARGUMENT);
15972 node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent);
15973 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
15974 } else {
15975 switch (PM_NODE_TYPE(node)) {
15976 case PM_INTEGER_NODE:
15977 case PM_FLOAT_NODE:
15978 case PM_RATIONAL_NODE:
15979 case PM_IMAGINARY_NODE:
15980 parse_negative_numeric(node);
15981 break;
15982 default:
15983 node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
15984 break;
15985 }
15986 }
15987
15988 return node;
15989 }
15991 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
15993
15994 pm_accepts_block_stack_push(parser, true);
15995 parser_lex(parser);
15996
15997 pm_token_t operator = parser->previous;
15998 pm_parser_scope_push(parser, false);
15999 pm_block_parameters_node_t *block_parameters;
16000
16001 switch (parser->current.type) {
16003 parser->current_scope->explicit_params = true;
16004 pm_token_t opening = parser->current;
16005 parser_lex(parser);
16006
16007 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16008 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
16009 } else {
16010 block_parameters = parse_block_parameters(parser, false, &opening, true);
16011 }
16012
16013 accept1(parser, PM_TOKEN_NEWLINE);
16014 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
16015
16016 pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
16017 break;
16018 }
16019 case PM_CASE_PARAMETER: {
16020 parser->current_scope->explicit_params = true;
16021 pm_accepts_block_stack_push(parser, false);
16022 pm_token_t opening = not_provided(parser);
16023 block_parameters = parse_block_parameters(parser, false, &opening, true);
16024 pm_accepts_block_stack_pop(parser);
16025 break;
16026 }
16027 default: {
16028 block_parameters = NULL;
16029 break;
16030 }
16031 }
16032
16033 uint32_t locals_body_index = 0;
16034
16035 if (block_parameters) {
16036 locals_body_index = (uint32_t) parser->current_scope->locals.size;
16037 }
16038
16039 pm_token_t opening;
16040 pm_node_t *body = NULL;
16041 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
16042
16043 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
16044 opening = parser->previous;
16045
16046 if (!accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16047 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES);
16048 expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
16049 }
16050 } else {
16051 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
16052 opening = parser->previous;
16053
16055 pm_accepts_block_stack_push(parser, true);
16056 body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END);
16057 pm_accepts_block_stack_pop(parser);
16058 }
16059
16060 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
16061 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
16062 body = (pm_node_t *) parse_rescues_as_begin(parser, (pm_statements_node_t *) body, false);
16063 }
16064
16065 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
16066 }
16067
16068 pm_node_t *parameters = (pm_node_t *) block_parameters;
16069 uint8_t maximum = parser->current_scope->numbered_parameters;
16070
16071 if (parameters == NULL && (maximum > 0)) {
16072 parameters = (pm_node_t *) pm_numbered_parameters_node_create(parser, &(pm_location_t) { .start = operator.start, .end = parser->previous.end }, maximum);
16073 locals_body_index = maximum;
16074 }
16075
16076 pm_constant_id_list_t locals = parser->current_scope->locals;
16077 pm_parser_scope_pop(parser);
16078 pm_accepts_block_stack_pop(parser);
16079 return (pm_node_t *) pm_lambda_node_create(parser, &locals, locals_body_index, &operator, &opening, &parser->previous, parameters, body);
16080 }
16081 case PM_TOKEN_UPLUS: {
16082 parser_lex(parser);
16083
16084 pm_token_t operator = parser->previous;
16085 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_PLUS);
16086 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
16087
16088 return (pm_node_t *) node;
16089 }
16091 return parse_strings(parser, NULL);
16092 case PM_TOKEN_SYMBOL_BEGIN: {
16093 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16094 parser_lex(parser);
16095
16096 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END);
16097 }
16098 default:
16099 if (context_recoverable(parser, &parser->current)) {
16100 parser->recovering = true;
16101 }
16102
16103 return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16104 }
16105}
16106
16107static inline pm_node_t *
16108parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
16109 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id);
16110
16111 // Contradicting binding powers, the right-hand-side value of rthe assignment allows the `rescue` modifier.
16112 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
16113 pm_token_t rescue = parser->current;
16114 parser_lex(parser);
16115 pm_node_t *right = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
16116
16117 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
16118 }
16119
16120 return value;
16121}
16122
16123
16124static inline pm_node_t *
16125parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
16126 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id);
16127
16128 bool is_single_value = true;
16129 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
16130 is_single_value = false;
16131 pm_token_t opening = not_provided(parser);
16132 pm_array_node_t *array = pm_array_node_create(parser, &opening);
16133
16134 pm_array_node_elements_append(array, value);
16135 value = (pm_node_t *) array;
16136
16137 while (accept1(parser, PM_TOKEN_COMMA)) {
16138 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT);
16139 pm_array_node_elements_append(array, element);
16140 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
16141 }
16142 }
16143
16144 // Contradicting binding powers, the right-hand-side value of the assignment allows the `rescue` modifier.
16145 if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
16146 pm_token_t rescue = parser->current;
16147 parser_lex(parser);
16148 pm_node_t *right = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
16149
16150 return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
16151 }
16152
16153 return value;
16154}
16155
16163static void
16164parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
16165 if (call_node->arguments != NULL) {
16166 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
16167 pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
16168 call_node->arguments = NULL;
16169 }
16170
16171 if (call_node->block != NULL) {
16172 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
16173 pm_node_destroy(parser, (pm_node_t *) call_node->block);
16174 call_node->block = NULL;
16175 }
16176}
16177
16178static bool
16179name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) {
16180 if (length == 0) {
16181 return false;
16182 }
16183
16184 size_t width = char_is_identifier_start(parser, source);
16185 if (!width) {
16186 return false;
16187 }
16188
16189 uint8_t *cursor = ((uint8_t *)source) + width;
16190 while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
16191 cursor += width;
16192 }
16193
16194 return cursor == source + length;
16195}
16196
16201static pm_node_t *
16202parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
16203 pm_string_list_t named_captures = { 0 };
16204 pm_node_t *result;
16205
16206 if (pm_regexp_named_capture_group_names(pm_string_source(content), pm_string_length(content), &named_captures, parser->encoding_changed, parser->encoding) && (named_captures.length > 0)) {
16207 // Since we should not create a MatchWriteNode when all capture names
16208 // are invalid, creating a MatchWriteNode is delayed here.
16209 pm_match_write_node_t *match = NULL;
16210 pm_constant_id_list_t names = { 0 };
16211
16212 for (size_t index = 0; index < named_captures.length; index++) {
16213 pm_string_t *string = &named_captures.strings[index];
16214
16215 const uint8_t *source = pm_string_source(string);
16216 size_t length = pm_string_length(string);
16217
16218 pm_location_t location;
16219 pm_constant_id_t name;
16220
16221 // If the name of the capture group isn't a valid identifier, we do
16222 // not add it to the local table.
16223 if (!name_is_identifier(parser, source, length)) continue;
16224
16225 if (content->type == PM_STRING_SHARED) {
16226 // If the unescaped string is a slice of the source, then we can
16227 // copy the names directly. The pointers will line up.
16228 location = (pm_location_t) { .start = source, .end = source + length };
16229 name = pm_parser_constant_id_location(parser, location.start, location.end);
16230 pm_refute_numbered_parameter(parser, source, source + length);
16231 } else {
16232 // Otherwise, the name is a slice of the malloc-ed owned string,
16233 // in which case we need to copy it out into a new string.
16234 location = call->receiver->location;
16235
16236 void *memory = malloc(length);
16237 if (memory == NULL) abort();
16238
16239 memcpy(memory, source, length);
16240 // This silences clang analyzer warning about leak of memory pointed by `memory`.
16241 // NOLINTNEXTLINE(clang-analyzer-*)
16242 name = pm_parser_constant_id_owned(parser, (const uint8_t *) memory, length);
16243
16244 if (pm_token_is_numbered_parameter(source, source + length)) {
16245 const pm_location_t *location = &call->receiver->location;
16246 PM_PARSER_ERR_LOCATION_FORMAT(parser, location, PM_ERR_PARAMETER_NUMBERED_RESERVED, location->start);
16247 }
16248 }
16249
16250 if (name != 0) {
16251 // We dont want to create duplicate targets if the capture name
16252 // is duplicated.
16253 if (pm_constant_id_list_includes(&names, name)) continue;
16254 pm_constant_id_list_append(&names, name);
16255
16256 // Here we lazily create the MatchWriteNode since we know we're
16257 // about to add a target.
16258 if (match == NULL) match = pm_match_write_node_create(parser, call);
16259
16260 // First, find the depth of the local that is being assigned.
16261 int depth;
16262 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
16263 pm_parser_local_add(parser, name);
16264 }
16265
16266 // Next, create the local variable target and add it to the
16267 // list of targets for the match.
16268 pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create_values(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
16269 pm_node_list_append(&match->targets, target);
16270 }
16271 }
16272
16273 if (match != NULL) {
16274 result = (pm_node_t *) match;
16275 } else {
16276 result = (pm_node_t *) call;
16277 }
16278
16279 pm_constant_id_list_free(&names);
16280 } else {
16281 result = (pm_node_t *) call;
16282 }
16283
16284 pm_string_list_free(&named_captures);
16285 return result;
16286}
16287
16288static inline pm_node_t *
16289parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call) {
16290 pm_token_t token = parser->current;
16291
16292 switch (token.type) {
16293 case PM_TOKEN_EQUAL: {
16294 switch (PM_NODE_TYPE(node)) {
16295 case PM_CALL_NODE: {
16296 // If we have no arguments to the call node and we need this
16297 // to be a target then this is either a method call or a
16298 // local variable write. This _must_ happen before the value
16299 // is parsed because it could be referenced in the value.
16300 pm_call_node_t *call_node = (pm_call_node_t *) node;
16301 if (pm_call_node_variable_call_p(call_node)) {
16302 pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end);
16303 }
16304 }
16305 /* fallthrough */
16306 case PM_CASE_WRITABLE: {
16307 parser_lex(parser);
16308 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
16309 return parse_write(parser, node, &token, value);
16310 }
16311 case PM_SPLAT_NODE: {
16312 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
16313 pm_multi_target_node_targets_append(parser, multi_target, node);
16314
16315 parser_lex(parser);
16316 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
16317 return parse_write(parser, (pm_node_t *) multi_target, &token, value);
16318 }
16319 default:
16320 parser_lex(parser);
16321
16322 // In this case we have an = sign, but we don't know what it's for. We
16323 // need to treat it as an error. For now, we'll mark it as an error
16324 // and just skip right past it.
16325 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
16326 return node;
16327 }
16328 }
16330 switch (PM_NODE_TYPE(node)) {
16333 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
16334 /* fallthrough */
16336 parser_lex(parser);
16337
16338 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16339 pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
16340
16341 pm_node_destroy(parser, node);
16342 return result;
16343 }
16345 parser_lex(parser);
16346
16347 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16348 pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
16349
16350 pm_node_destroy(parser, node);
16351 return result;
16352 }
16353 case PM_CONSTANT_PATH_NODE: {
16354 parser_lex(parser);
16355
16356 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16357 return (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
16358 }
16359 case PM_CONSTANT_READ_NODE: {
16360 parser_lex(parser);
16361
16362 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16363 pm_node_t *result = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
16364
16365 pm_node_destroy(parser, node);
16366 return result;
16367 }
16369 parser_lex(parser);
16370
16371 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16372 pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
16373
16374 pm_node_destroy(parser, node);
16375 return result;
16376 }
16379 parser_lex(parser);
16380
16381 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16382 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
16383
16384 pm_node_destroy(parser, node);
16385 return result;
16386 }
16387 case PM_CALL_NODE: {
16388 parser_lex(parser);
16389 pm_call_node_t *cast = (pm_call_node_t *) node;
16390
16391 // If we have a vcall (a method with no arguments and no
16392 // receiver that could have been a local variable) then we
16393 // will transform it into a local variable write.
16394 if (pm_call_node_variable_call_p(cast)) {
16395 pm_location_t *message_loc = &cast->message_loc;
16396 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
16397
16398 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16399 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16400 pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
16401
16402 pm_node_destroy(parser, (pm_node_t *) cast);
16403 return result;
16404 }
16405
16406 // If there is no call operator and the message is "[]" then
16407 // this is an aref expression, and we can transform it into
16408 // an aset expression.
16409 if (pm_call_node_index_p(cast)) {
16410 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16411 return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
16412 }
16413
16414 // If this node cannot be writable, then we have an error.
16415 if (pm_call_node_writable_p(cast)) {
16416 parse_write_name(parser, &cast->name);
16417 } else {
16418 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
16419 }
16420
16421 parse_call_operator_write(parser, cast, &token);
16422 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16423 return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
16424 }
16425 case PM_MULTI_WRITE_NODE: {
16426 parser_lex(parser);
16427 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
16428 return node;
16429 }
16430 default:
16431 parser_lex(parser);
16432
16433 // In this case we have an &&= sign, but we don't know what it's for.
16434 // We need to treat it as an error. For now, we'll mark it as an error
16435 // and just skip right past it.
16436 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
16437 return node;
16438 }
16439 }
16441 switch (PM_NODE_TYPE(node)) {
16444 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
16445 /* fallthrough */
16447 parser_lex(parser);
16448
16449 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16450 pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
16451
16452 pm_node_destroy(parser, node);
16453 return result;
16454 }
16456 parser_lex(parser);
16457
16458 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16459 pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
16460
16461 pm_node_destroy(parser, node);
16462 return result;
16463 }
16464 case PM_CONSTANT_PATH_NODE: {
16465 parser_lex(parser);
16466
16467 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16468 return (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
16469 }
16470 case PM_CONSTANT_READ_NODE: {
16471 parser_lex(parser);
16472
16473 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16474 pm_node_t *result = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
16475
16476 pm_node_destroy(parser, node);
16477 return result;
16478 }
16480 parser_lex(parser);
16481
16482 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16483 pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
16484
16485 pm_node_destroy(parser, node);
16486 return result;
16487 }
16490 parser_lex(parser);
16491
16492 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16493 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
16494
16495 pm_node_destroy(parser, node);
16496 return result;
16497 }
16498 case PM_CALL_NODE: {
16499 parser_lex(parser);
16500 pm_call_node_t *cast = (pm_call_node_t *) node;
16501
16502 // If we have a vcall (a method with no arguments and no
16503 // receiver that could have been a local variable) then we
16504 // will transform it into a local variable write.
16505 if (pm_call_node_variable_call_p(cast)) {
16506 pm_location_t *message_loc = &cast->message_loc;
16507 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
16508
16509 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16510 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16511 pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
16512
16513 pm_node_destroy(parser, (pm_node_t *) cast);
16514 return result;
16515 }
16516
16517 // If there is no call operator and the message is "[]" then
16518 // this is an aref expression, and we can transform it into
16519 // an aset expression.
16520 if (pm_call_node_index_p(cast)) {
16521 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16522 return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
16523 }
16524
16525 // If this node cannot be writable, then we have an error.
16526 if (pm_call_node_writable_p(cast)) {
16527 parse_write_name(parser, &cast->name);
16528 } else {
16529 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
16530 }
16531
16532 parse_call_operator_write(parser, cast, &token);
16533 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16534 return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
16535 }
16536 case PM_MULTI_WRITE_NODE: {
16537 parser_lex(parser);
16538 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
16539 return node;
16540 }
16541 default:
16542 parser_lex(parser);
16543
16544 // In this case we have an ||= sign, but we don't know what it's for.
16545 // We need to treat it as an error. For now, we'll mark it as an error
16546 // and just skip right past it.
16547 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
16548 return node;
16549 }
16550 }
16562 switch (PM_NODE_TYPE(node)) {
16565 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
16566 /* fallthrough */
16568 parser_lex(parser);
16569
16570 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16571 pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
16572
16573 pm_node_destroy(parser, node);
16574 return result;
16575 }
16577 parser_lex(parser);
16578
16579 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16580 pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
16581
16582 pm_node_destroy(parser, node);
16583 return result;
16584 }
16585 case PM_CONSTANT_PATH_NODE: {
16586 parser_lex(parser);
16587
16588 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16589 return (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
16590 }
16591 case PM_CONSTANT_READ_NODE: {
16592 parser_lex(parser);
16593
16594 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16595 pm_node_t *result = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
16596
16597 pm_node_destroy(parser, node);
16598 return result;
16599 }
16601 parser_lex(parser);
16602
16603 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16604 pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
16605
16606 pm_node_destroy(parser, node);
16607 return result;
16608 }
16611 parser_lex(parser);
16612
16613 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16614 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
16615
16616 pm_node_destroy(parser, node);
16617 return result;
16618 }
16619 case PM_CALL_NODE: {
16620 parser_lex(parser);
16621 pm_call_node_t *cast = (pm_call_node_t *) node;
16622
16623 // If we have a vcall (a method with no arguments and no
16624 // receiver that could have been a local variable) then we
16625 // will transform it into a local variable write.
16626 if (pm_call_node_variable_call_p(cast)) {
16627 pm_location_t *message_loc = &cast->message_loc;
16628 pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
16629
16630 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end);
16631 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16632 pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
16633
16634 pm_node_destroy(parser, (pm_node_t *) cast);
16635 return result;
16636 }
16637
16638 // If there is no call operator and the message is "[]" then
16639 // this is an aref expression, and we can transform it into
16640 // an aset expression.
16641 if (pm_call_node_index_p(cast)) {
16642 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16643 return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
16644 }
16645
16646 // If this node cannot be writable, then we have an error.
16647 if (pm_call_node_writable_p(cast)) {
16648 parse_write_name(parser, &cast->name);
16649 } else {
16650 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
16651 }
16652
16653 parse_call_operator_write(parser, cast, &token);
16654 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16655 return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
16656 }
16657 case PM_MULTI_WRITE_NODE: {
16658 parser_lex(parser);
16659 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
16660 return node;
16661 }
16662 default:
16663 parser_lex(parser);
16664
16665 // In this case we have an operator but we don't know what it's for.
16666 // We need to treat it as an error. For now, we'll mark it as an error
16667 // and just skip right past it.
16668 pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16669 return node;
16670 }
16671 }
16673 case PM_TOKEN_KEYWORD_AND: {
16674 parser_lex(parser);
16675
16676 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16677 return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
16678 }
16680 case PM_TOKEN_PIPE_PIPE: {
16681 parser_lex(parser);
16682
16683 pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16684 return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
16685 }
16686 case PM_TOKEN_EQUAL_TILDE: {
16687 // Note that we _must_ parse the value before adding the local
16688 // variables in order to properly mirror the behavior of Ruby. For
16689 // example,
16690 //
16691 // /(?<foo>bar)/ =~ foo
16692 //
16693 // In this case, `foo` should be a method call and not a local yet.
16694 parser_lex(parser);
16695 pm_node_t *argument = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16696
16697 // By default, we're going to create a call node and then return it.
16698 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument);
16699 pm_node_t *result = (pm_node_t *) call;
16700
16701 // If the receiver of this =~ is a regular expression node, then we
16702 // need to introduce local variables for it based on its named
16703 // capture groups.
16705 // It's possible to have an interpolated regular expression node
16706 // that only contains strings. This is because it can be split
16707 // up by a heredoc. In this case we need to concat the unescaped
16708 // strings together and then parse them as a regular expression.
16710
16711 bool interpolated = false;
16712 size_t total_length = 0;
16713
16714 for (size_t index = 0; index < parts->size; index++) {
16715 pm_node_t *part = parts->nodes[index];
16716
16717 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
16718 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
16719 } else {
16720 interpolated = true;
16721 break;
16722 }
16723 }
16724
16725 if (!interpolated && total_length > 0) {
16726 void *memory = malloc(total_length);
16727 if (!memory) abort();
16728
16729 uint8_t *cursor = memory;
16730 for (size_t index = 0; index < parts->size; index++) {
16731 pm_string_t *unescaped = &((pm_string_node_t *) parts->nodes[index])->unescaped;
16732 size_t length = pm_string_length(unescaped);
16733
16734 memcpy(cursor, pm_string_source(unescaped), length);
16735 cursor += length;
16736 }
16737
16738 pm_string_t owned;
16739 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
16740
16741 result = parse_regular_expression_named_captures(parser, &owned, call);
16742 pm_string_free(&owned);
16743 }
16744 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
16745 // If we have a regular expression node, then we can just parse
16746 // the named captures directly off the unescaped string.
16747 const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
16748 result = parse_regular_expression_named_captures(parser, content, call);
16749 }
16750
16751 return result;
16752 }
16754 case PM_TOKEN_USTAR:
16756 // The only times this will occur are when we are in an error state,
16757 // but we'll put them in here so that errors can propagate.
16763 case PM_TOKEN_GREATER:
16765 case PM_TOKEN_LESS:
16767 case PM_TOKEN_CARET:
16768 case PM_TOKEN_PIPE:
16769 case PM_TOKEN_AMPERSAND:
16771 case PM_TOKEN_LESS_LESS:
16772 case PM_TOKEN_MINUS:
16773 case PM_TOKEN_PLUS:
16774 case PM_TOKEN_PERCENT:
16775 case PM_TOKEN_SLASH:
16776 case PM_TOKEN_STAR:
16777 case PM_TOKEN_STAR_STAR: {
16778 parser_lex(parser);
16779
16780 pm_node_t *argument = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16781 return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument);
16782 }
16784 case PM_TOKEN_DOT: {
16785 parser_lex(parser);
16786 pm_token_t operator = parser->previous;
16787 pm_arguments_t arguments = { 0 };
16788
16789 // This if statement handles the foo.() syntax.
16790 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
16791 parse_arguments_list(parser, &arguments, true, false);
16792 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
16793 }
16794
16795 pm_token_t message;
16796
16797 switch (parser->current.type) {
16798 case PM_CASE_OPERATOR:
16799 case PM_CASE_KEYWORD:
16800 case PM_TOKEN_CONSTANT:
16802 case PM_TOKEN_METHOD_NAME: {
16803 parser_lex(parser);
16804 message = parser->previous;
16805 break;
16806 }
16807 default: {
16808 pm_parser_err_current(parser, PM_ERR_DEF_NAME);
16809 message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
16810 }
16811 }
16812
16813 parse_arguments_list(parser, &arguments, true, accepts_command_call);
16814 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
16815
16816 if (
16817 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
16818 arguments.arguments == NULL &&
16819 arguments.opening_loc.start == NULL &&
16820 match1(parser, PM_TOKEN_COMMA)
16821 ) {
16822 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX);
16823 } else {
16824 return (pm_node_t *) call;
16825 }
16826 }
16827 case PM_TOKEN_DOT_DOT:
16828 case PM_TOKEN_DOT_DOT_DOT: {
16829 parser_lex(parser);
16830
16831 pm_node_t *right = NULL;
16832 if (token_begins_expression_p(parser->current.type)) {
16833 right = parse_expression(parser, binding_power, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
16834 }
16835
16836 return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
16837 }
16839 pm_token_t keyword = parser->current;
16840 parser_lex(parser);
16841
16842 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_IF_PREDICATE);
16843 return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
16844 }
16846 pm_token_t keyword = parser->current;
16847 parser_lex(parser);
16848
16849 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_UNLESS_PREDICATE);
16850 return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
16851 }
16853 parser_lex(parser);
16854 pm_statements_node_t *statements = pm_statements_node_create(parser);
16855 pm_statements_node_body_append(statements, node);
16856
16857 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
16858 return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
16859 }
16861 parser_lex(parser);
16862 pm_statements_node_t *statements = pm_statements_node_create(parser);
16863 pm_statements_node_body_append(statements, node);
16864
16865 pm_node_t *predicate = parse_value_expression(parser, binding_power, true, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
16866 return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
16867 }
16869 pm_token_t qmark = parser->current;
16870 parser_lex(parser);
16871 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_TERNARY_EXPRESSION_TRUE);
16872
16873 if (parser->recovering) {
16874 // If parsing the true expression of this ternary resulted in a syntax
16875 // error that we can recover from, then we're going to put missing nodes
16876 // and tokens into the remaining places. We want to be sure to do this
16877 // before the `expect` function call to make sure it doesn't
16878 // accidentally move past a ':' token that occurs after the syntax
16879 // error.
16880 pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
16881 pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
16882
16883 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
16884 }
16885
16886 accept1(parser, PM_TOKEN_NEWLINE);
16887 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
16888
16889 pm_token_t colon = parser->previous;
16890 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_TERNARY_EXPRESSION_FALSE);
16891
16892 return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
16893 }
16894 case PM_TOKEN_COLON_COLON: {
16895 parser_lex(parser);
16896 pm_token_t delimiter = parser->previous;
16897
16898 switch (parser->current.type) {
16899 case PM_TOKEN_CONSTANT: {
16900 parser_lex(parser);
16901 pm_node_t *path;
16902
16903 if (
16904 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
16905 (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))
16906 ) {
16907 // If we have a constant immediately following a '::' operator, then
16908 // this can either be a constant path or a method call, depending on
16909 // what follows the constant.
16910 //
16911 // If we have parentheses, then this is a method call. That would
16912 // look like Foo::Bar().
16913 pm_token_t message = parser->previous;
16914 pm_arguments_t arguments = { 0 };
16915
16916 parse_arguments_list(parser, &arguments, true, accepts_command_call);
16917 path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
16918 } else {
16919 // Otherwise, this is a constant path. That would look like Foo::Bar.
16920 pm_node_t *child = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
16921 path = (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
16922 }
16923
16924 // If this is followed by a comma then it is a multiple assignment.
16925 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
16926 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX);
16927 }
16928
16929 return path;
16930 }
16931 case PM_CASE_OPERATOR:
16932 case PM_CASE_KEYWORD:
16934 case PM_TOKEN_METHOD_NAME: {
16935 parser_lex(parser);
16936 pm_token_t message = parser->previous;
16937
16938 // If we have an identifier following a '::' operator, then it is for
16939 // sure a method call.
16940 pm_arguments_t arguments = { 0 };
16941 parse_arguments_list(parser, &arguments, true, accepts_command_call);
16942 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
16943
16944 // If this is followed by a comma then it is a multiple assignment.
16945 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
16946 return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX);
16947 }
16948
16949 return (pm_node_t *) call;
16950 }
16952 // If we have a parenthesis following a '::' operator, then it is the
16953 // method call shorthand. That would look like Foo::(bar).
16954 pm_arguments_t arguments = { 0 };
16955 parse_arguments_list(parser, &arguments, true, false);
16956
16957 return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
16958 }
16959 default: {
16960 pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16961 pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
16962 return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
16963 }
16964 }
16965 }
16967 parser_lex(parser);
16968 accept1(parser, PM_TOKEN_NEWLINE);
16969 pm_node_t *value = parse_expression(parser, binding_power, true, PM_ERR_RESCUE_MODIFIER_VALUE);
16970
16971 return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
16972 }
16973 case PM_TOKEN_BRACKET_LEFT: {
16974 parser_lex(parser);
16975
16976 pm_arguments_t arguments = { 0 };
16977 arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
16978
16979 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16980 pm_accepts_block_stack_push(parser, true);
16981 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT);
16982 pm_accepts_block_stack_pop(parser);
16983 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
16984 }
16985
16986 arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
16987
16988 // If we have a comma after the closing bracket then this is a multiple
16989 // assignment and we should parse the targets.
16990 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
16991 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
16992 return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX);
16993 }
16994
16995 // If we're at the end of the arguments, we can now check if there is a
16996 // block node that starts with a {. If there is, then we can parse it and
16997 // add it to the arguments.
16998 pm_block_node_t *block = NULL;
16999 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
17000 block = parse_block(parser);
17001 pm_arguments_validate_block(parser, &arguments, block);
17002 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
17003 block = parse_block(parser);
17004 }
17005
17006 if (block != NULL) {
17007 if (arguments.block != NULL) {
17008 pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
17009 if (arguments.arguments == NULL) {
17010 arguments.arguments = pm_arguments_node_create(parser);
17011 }
17012 pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
17013 }
17014
17015 arguments.block = (pm_node_t *) block;
17016 }
17017
17018 return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
17019 }
17020 case PM_TOKEN_KEYWORD_IN: {
17021 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17022 parser->pattern_matching_newlines = true;
17023
17024 pm_token_t operator = parser->current;
17025 parser->command_start = false;
17026 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
17027
17028 parser_lex(parser);
17029
17030 pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_IN);
17031 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17032
17033 return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
17034 }
17036 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17037 parser->pattern_matching_newlines = true;
17038
17039 pm_token_t operator = parser->current;
17040 parser->command_start = false;
17041 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
17042
17043 parser_lex(parser);
17044
17045 pm_node_t *pattern = parse_pattern(parser, true, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET);
17046 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17047
17048 return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
17049 }
17050 default:
17051 assert(false && "unreachable");
17052 return NULL;
17053 }
17054}
17055
17064static pm_node_t *
17065parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id) {
17066 pm_token_t recovery = parser->previous;
17067 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call);
17068
17069 switch (PM_NODE_TYPE(node)) {
17070 case PM_MISSING_NODE:
17071 // If we found a syntax error, then the type of node returned by
17072 // parse_expression_prefix is going to be a missing node. In that
17073 // case we need to add the error message to the parser's error list.
17074 pm_parser_err(parser, recovery.end, recovery.end, diag_id);
17075 return node;
17080 case PM_UNDEF_NODE:
17081 // These expressions are statements, and cannot be followed by
17082 // operators (except modifiers).
17083 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER_RESCUE) {
17084 return node;
17085 }
17086 break;
17087 case PM_RANGE_NODE:
17088 // Range operators are non-associative, so that it does not
17089 // associate with other range operators (i.e. `..1..` should be
17090 // rejected.) For this reason, we check such a case for unary ranges
17091 // here, and if so, it returns the node immediately,
17092 if ((((pm_range_node_t *) node)->left == NULL) && pm_binding_powers[parser->current.type].left >= PM_BINDING_POWER_RANGE) {
17093 return node;
17094 }
17095 break;
17096 default:
17097 break;
17098 }
17099
17100 // Otherwise we'll look and see if the next token can be parsed as an infix
17101 // operator. If it can, then we'll parse it using parse_expression_infix.
17102 pm_binding_powers_t current_binding_powers;
17103 while (
17104 current_binding_powers = pm_binding_powers[parser->current.type],
17105 binding_power <= current_binding_powers.left &&
17106 current_binding_powers.binary
17107 ) {
17108 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call);
17109 if (current_binding_powers.nonassoc) {
17110 bool endless_range_p = PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL;
17111 pm_binding_power_t left = endless_range_p ? PM_BINDING_POWER_TERM : current_binding_powers.left;
17112 if (
17113 left <= pm_binding_powers[parser->current.type].left ||
17114 // Exceptionally to operator precedences, '1.. & 2' is rejected.
17115 // '1.. || 2' is also an exception, but it is handled by the lexer.
17116 // (Here, parser->current is PM_TOKEN_PIPE, not PM_TOKEN_PIPE_PIPE).
17117 (endless_range_p && match1(parser, PM_TOKEN_AMPERSAND))
17118 ) {
17119 break;
17120 }
17121 }
17122 if (accepts_command_call) {
17123 // A command-style method call is only accepted on method chains.
17124 // Thus, we check whether the parsed node can continue method chains.
17125 // The method chain can continue if the parsed node is one of the following five kinds:
17126 // (1) index access: foo[1]
17127 // (2) attribute access: foo.bar
17128 // (3) method call with parenthesis: foo.bar(1)
17129 // (4) method call with a block: foo.bar do end
17130 // (5) constant path: foo::Bar
17131 switch (node->type) {
17132 case PM_CALL_NODE: {
17133 pm_call_node_t *cast = (pm_call_node_t *)node;
17134 if (
17135 // (1) foo[1]
17136 !(
17137 cast->call_operator_loc.start == NULL &&
17138 cast->message_loc.start != NULL &&
17139 cast->message_loc.start[0] == '[' &&
17140 cast->message_loc.end[-1] == ']'
17141 ) &&
17142 // (2) foo.bar
17143 !(
17144 cast->call_operator_loc.start != NULL &&
17145 cast->arguments == NULL &&
17146 cast->block == NULL &&
17147 cast->opening_loc.start == NULL
17148 ) &&
17149 // (3) foo.bar(1)
17150 !(
17151 cast->call_operator_loc.start != NULL &&
17152 cast->opening_loc.start != NULL
17153 ) &&
17154 // (4) foo.bar do end
17155 !(
17156 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
17157 )
17158 ) {
17159 accepts_command_call = false;
17160 }
17161 break;
17162 }
17163 // (5) foo::Bar
17165 break;
17166 default:
17167 accepts_command_call = false;
17168 break;
17169 }
17170 }
17171 }
17172
17173 return node;
17174}
17175
17176static pm_node_t *
17177parse_program(pm_parser_t *parser) {
17178 pm_parser_scope_push(parser, !parser->current_scope);
17179 parser_lex(parser);
17180
17181 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN);
17182 if (!statements) {
17183 statements = pm_statements_node_create(parser);
17184 }
17185 pm_constant_id_list_t locals = parser->current_scope->locals;
17186 pm_parser_scope_pop(parser);
17187
17188 // If this is an empty file, then we're still going to parse all of the
17189 // statements in order to gather up all of the comments and such. Here we'll
17190 // correct the location information.
17191 if (pm_statements_node_body_length(statements) == 0) {
17192 pm_statements_node_location_set(statements, parser->start, parser->start);
17193 }
17194
17195 return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
17196}
17197
17198/******************************************************************************/
17199/* External functions */
17200/******************************************************************************/
17201
17206pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
17207 assert(source != NULL);
17208
17209 *parser = (pm_parser_t) {
17210 .lex_state = PM_LEX_STATE_BEG,
17211 .enclosure_nesting = 0,
17212 .lambda_enclosure_nesting = -1,
17213 .brace_nesting = 0,
17214 .do_loop_stack = 0,
17215 .accepts_block_stack = 0,
17216 .lex_modes = {
17217 .index = 0,
17218 .stack = {{ .mode = PM_LEX_DEFAULT }},
17219 .current = &parser->lex_modes.stack[0],
17220 },
17221 .start = source,
17222 .end = source + size,
17223 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
17224 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
17225 .next_start = NULL,
17226 .heredoc_end = NULL,
17227 .comment_list = { 0 },
17228 .magic_comment_list = { 0 },
17229 .warning_list = { 0 },
17230 .error_list = { 0 },
17231 .current_scope = NULL,
17232 .current_context = NULL,
17233 .encoding = PM_ENCODING_UTF_8_ENTRY,
17234 .encoding_changed_callback = NULL,
17235 .encoding_comment_start = source,
17236 .lex_callback = NULL,
17237 .filepath_string = { 0 },
17238 .constant_pool = { 0 },
17239 .newline_list = { 0 },
17240 .integer_base = 0,
17241 .current_string = PM_STRING_EMPTY,
17242 .start_line = 1,
17243 .explicit_encoding = NULL,
17244 .command_start = true,
17245 .recovering = false,
17246 .encoding_changed = false,
17247 .pattern_matching_newlines = false,
17248 .in_keyword_arg = false,
17249 .current_param_name = 0,
17250 .semantic_token_seen = false,
17251 .frozen_string_literal = false,
17252 .suppress_warnings = false
17253 };
17254
17255 // Initialize the constant pool. We're going to completely guess as to the
17256 // number of constants that we'll need based on the size of the input. The
17257 // ratio we chose here is actually less arbitrary than you might think.
17258 //
17259 // We took ~50K Ruby files and measured the size of the file versus the
17260 // number of constants that were found in those files. Then we found the
17261 // average and standard deviation of the ratios of constants/bytesize. Then
17262 // we added 1.34 standard deviations to the average to get a ratio that
17263 // would fit 75% of the files (for a two-tailed distribution). This works
17264 // because there was about a 0.77 correlation and the distribution was
17265 // roughly normal.
17266 //
17267 // This ratio will need to change if we add more constants to the constant
17268 // pool for another node type.
17269 uint32_t constant_size = ((uint32_t) size) / 95;
17270 pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
17271
17272 // Initialize the newline list. Similar to the constant pool, we're going to
17273 // guess at the number of newlines that we'll need based on the size of the
17274 // input.
17275 size_t newline_size = size / 22;
17276 pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
17277
17278 // If options were provided to this parse, establish them here.
17279 if (options != NULL) {
17280 // filepath option
17281 parser->filepath_string = options->filepath;
17282
17283 // line option
17284 parser->start_line = options->line;
17285
17286 // encoding option
17287 size_t encoding_length = pm_string_length(&options->encoding);
17288 if (encoding_length > 0) {
17289 const uint8_t *encoding_source = pm_string_source(&options->encoding);
17290 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
17291 }
17292
17293 // frozen_string_literal option
17294 if (options->frozen_string_literal) {
17295 parser->frozen_string_literal = true;
17296 }
17297
17298 // suppress_warnings option
17299 if (options->suppress_warnings) {
17300 parser->suppress_warnings = true;
17301 }
17302
17303 // scopes option
17304 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
17305 const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
17306 pm_parser_scope_push(parser, scope_index == 0);
17307
17308 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
17309 const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
17310
17311 const uint8_t *source = pm_string_source(local);
17312 size_t length = pm_string_length(local);
17313
17314 uint8_t *allocated = malloc(length);
17315 if (allocated == NULL) continue;
17316
17317 memcpy((void *) allocated, source, length);
17318 pm_parser_local_add_owned(parser, allocated, length);
17319 }
17320 }
17321 }
17322
17323 pm_accepts_block_stack_push(parser, true);
17324
17325 // Skip past the UTF-8 BOM if it exists.
17326 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
17327 parser->current.end += 3;
17328 parser->encoding_comment_start += 3;
17329 }
17330
17331 // If the first two bytes of the source are a shebang, then we'll indicate
17332 // that the encoding comment is at the end of the shebang.
17333 if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
17334 const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
17335 if (encoding_comment_start) {
17336 parser->encoding_comment_start = encoding_comment_start + 1;
17337 }
17338 }
17339}
17340
17346pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback) {
17347 parser->encoding_changed_callback = callback;
17348}
17349
17353static inline void
17354pm_comment_list_free(pm_list_t *list) {
17355 pm_list_node_t *node, *next;
17356
17357 for (node = list->head; node != NULL; node = next) {
17358 next = node->next;
17359
17360 pm_comment_t *comment = (pm_comment_t *) node;
17361 free(comment);
17362 }
17363}
17364
17368static inline void
17369pm_magic_comment_list_free(pm_list_t *list) {
17370 pm_list_node_t *node, *next;
17371
17372 for (node = list->head; node != NULL; node = next) {
17373 next = node->next;
17374
17376 free(magic_comment);
17377 }
17378}
17379
17384pm_parser_free(pm_parser_t *parser) {
17385 pm_string_free(&parser->filepath_string);
17386 pm_diagnostic_list_free(&parser->error_list);
17387 pm_diagnostic_list_free(&parser->warning_list);
17388 pm_comment_list_free(&parser->comment_list);
17389 pm_magic_comment_list_free(&parser->magic_comment_list);
17390 pm_constant_pool_free(&parser->constant_pool);
17391 pm_newline_list_free(&parser->newline_list);
17392
17393 while (parser->current_scope != NULL) {
17394 // Normally, popping the scope doesn't free the locals since it is
17395 // assumed that ownership has transferred to the AST. However if we have
17396 // scopes while we're freeing the parser, it's likely they came from
17397 // eval scopes and we need to free them explicitly here.
17398 pm_constant_id_list_free(&parser->current_scope->locals);
17399 pm_parser_scope_pop(parser);
17400 }
17401
17402 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
17403 lex_mode_pop(parser);
17404 }
17405}
17406
17411pm_parse(pm_parser_t *parser) {
17412 return parse_program(parser);
17413}
17414
17415static inline void
17416pm_serialize_header(pm_buffer_t *buffer) {
17417 pm_buffer_append_string(buffer, "PRISM", 5);
17418 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
17419 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
17420 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
17421 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
17422}
17423
17428pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
17429 pm_serialize_header(buffer);
17430 pm_serialize_content(parser, node, buffer);
17431 pm_buffer_append_byte(buffer, '\0');
17432}
17433
17439pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
17440 pm_options_t options = { 0 };
17441 pm_options_read(&options, data);
17442
17443 pm_parser_t parser;
17444 pm_parser_init(&parser, source, size, &options);
17445
17446 pm_node_t *node = pm_parse(&parser);
17447
17448 pm_serialize_header(buffer);
17449 pm_serialize_content(&parser, node, buffer);
17450 pm_buffer_append_byte(buffer, '\0');
17451
17452 pm_node_destroy(&parser, node);
17453 pm_parser_free(&parser);
17454 pm_options_free(&options);
17455}
17456
17461pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
17462 pm_options_t options = { 0 };
17463 pm_options_read(&options, data);
17464
17465 pm_parser_t parser;
17466 pm_parser_init(&parser, source, size, &options);
17467
17468 pm_node_t *node = pm_parse(&parser);
17469 pm_serialize_header(buffer);
17470 pm_serialize_encoding(parser.encoding, buffer);
17471 pm_buffer_append_varsint(buffer, parser.start_line);
17472 pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
17473
17474 pm_node_destroy(&parser, node);
17475 pm_parser_free(&parser);
17476 pm_options_free(&options);
17477}
17478
17479#undef PM_CASE_KEYWORD
17480#undef PM_CASE_OPERATOR
17481#undef PM_CASE_WRITABLE
17482#undef PM_STRING_EMPTY
17483#undef PM_LOCATION_NODE_BASE_VALUE
17484#undef PM_LOCATION_NODE_VALUE
17485#undef PM_LOCATION_NULL_VALUE
17486#undef PM_LOCATION_TOKEN_VALUE
struct pm_block_parameter_node pm_block_parameter_node_t
BlockParameterNode.
struct pm_call_target_node pm_call_target_node_t
CallTargetNode.
struct pm_else_node pm_else_node_t
ElseNode.
struct pm_assoc_node pm_assoc_node_t
AssocNode.
struct pm_undef_node pm_undef_node_t
UndefNode.
struct pm_class_variable_and_write_node pm_class_variable_and_write_node_t
ClassVariableAndWriteNode.
struct pm_index_and_write_node pm_index_and_write_node_t
IndexAndWriteNode.
struct pm_index_target_node pm_index_target_node_t
IndexTargetNode.
struct pm_local_variable_target_node pm_local_variable_target_node_t
LocalVariableTargetNode.
struct pm_constant_path_or_write_node pm_constant_path_or_write_node_t
ConstantPathOrWriteNode.
struct pm_missing_node pm_missing_node_t
MissingNode.
struct pm_embedded_statements_node pm_embedded_statements_node_t
EmbeddedStatementsNode.
struct pm_block_node pm_block_node_t
BlockNode.
struct pm_hash_pattern_node pm_hash_pattern_node_t
HashPatternNode.
struct pm_optional_parameter_node pm_optional_parameter_node_t
OptionalParameterNode.
struct pm_x_string_node pm_x_string_node_t
XStringNode.
struct pm_forwarding_super_node pm_forwarding_super_node_t
ForwardingSuperNode.
struct pm_self_node pm_self_node_t
SelfNode.
struct pm_numbered_reference_read_node pm_numbered_reference_read_node_t
NumberedReferenceReadNode.
struct pm_embedded_variable_node pm_embedded_variable_node_t
EmbeddedVariableNode.
struct pm_class_variable_write_node pm_class_variable_write_node_t
ClassVariableWriteNode.
struct pm_interpolated_string_node pm_interpolated_string_node_t
InterpolatedStringNode.
struct pm_class_variable_or_write_node pm_class_variable_or_write_node_t
ClassVariableOrWriteNode.
struct pm_optional_keyword_parameter_node pm_optional_keyword_parameter_node_t
OptionalKeywordParameterNode.
struct pm_call_or_write_node pm_call_or_write_node_t
CallOrWriteNode.
struct pm_call_node pm_call_node_t
CallNode.
struct pm_class_variable_read_node pm_class_variable_read_node_t
ClassVariableReadNode.
struct pm_match_required_node pm_match_required_node_t
MatchRequiredNode.
struct pm_constant_and_write_node pm_constant_and_write_node_t
ConstantAndWriteNode.
struct pm_constant_path_operator_write_node pm_constant_path_operator_write_node_t
ConstantPathOperatorWriteNode.
struct pm_implicit_rest_node pm_implicit_rest_node_t
ImplicitRestNode.
@ PM_RANGE_FLAGS_EXCLUDE_END
... operator
Definition ast.h:4535
struct pm_local_variable_or_write_node pm_local_variable_or_write_node_t
LocalVariableOrWriteNode.
struct pm_local_variable_read_node pm_local_variable_read_node_t
LocalVariableReadNode.
struct pm_global_variable_and_write_node pm_global_variable_and_write_node_t
GlobalVariableAndWriteNode.
struct pm_arguments_node pm_arguments_node_t
ArgumentsNode.
@ PM_DEFINED_NODE
DefinedNode.
Definition ast.h:707
@ PM_PRE_EXECUTION_NODE
PreExecutionNode.
Definition ast.h:923
@ PM_RETRY_NODE
RetryNode.
Definition ast.h:956
@ PM_REDO_NODE
RedoNode.
Definition ast.h:935
@ PM_CONSTANT_PATH_WRITE_NODE
ConstantPathWriteNode.
Definition ast.h:692
@ PM_INDEX_AND_WRITE_NODE
IndexAndWriteNode.
Definition ast.h:785
@ PM_SOURCE_LINE_NODE
SourceLineNode.
Definition ast.h:974
@ PM_UNLESS_NODE
UnlessNode.
Definition ast.h:998
@ PM_EMBEDDED_VARIABLE_NODE
EmbeddedVariableNode.
Definition ast.h:716
@ PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE
GlobalVariableOperatorWriteNode.
Definition ast.h:749
@ PM_CALL_NODE
CallNode.
Definition ast.h:626
@ PM_NIL_NODE
NilNode.
Definition ast.h:887
@ PM_GLOBAL_VARIABLE_READ_NODE
GlobalVariableReadNode.
Definition ast.h:755
@ PM_RATIONAL_NODE
RationalNode.
Definition ast.h:932
@ PM_YIELD_NODE
YieldNode.
Definition ast.h:1013
@ PM_LOCAL_VARIABLE_AND_WRITE_NODE
LocalVariableAndWriteNode.
Definition ast.h:842
@ PM_CONSTANT_AND_WRITE_NODE
ConstantAndWriteNode.
Definition ast.h:668
@ PM_CLASS_NODE
ClassNode.
Definition ast.h:647
@ PM_FIND_PATTERN_NODE
FindPatternNode.
Definition ast.h:725
@ PM_CALL_OPERATOR_WRITE_NODE
CallOperatorWriteNode.
Definition ast.h:629
@ PM_MATCH_WRITE_NODE
MatchWriteNode.
Definition ast.h:869
@ PM_ARRAY_NODE
ArrayNode.
Definition ast.h:587
@ PM_CONSTANT_PATH_TARGET_NODE
ConstantPathTargetNode.
Definition ast.h:689
@ PM_PROGRAM_NODE
ProgramNode.
Definition ast.h:926
@ PM_OR_NODE
OrNode.
Definition ast.h:905
@ PM_MULTI_WRITE_NODE
MultiWriteNode.
Definition ast.h:881
@ PM_IF_NODE
IfNode.
Definition ast.h:770
@ PM_IMPLICIT_NODE
ImplicitNode.
Definition ast.h:776
@ PM_ARGUMENTS_NODE
ArgumentsNode.
Definition ast.h:584
@ PM_FORWARDING_SUPER_NODE
ForwardingSuperNode.
Definition ast.h:743
@ PM_WHILE_NODE
WhileNode.
Definition ast.h:1007
@ PM_INTERPOLATED_STRING_NODE
InterpolatedStringNode.
Definition ast.h:824
@ PM_FALSE_NODE
FalseNode.
Definition ast.h:722
@ PM_FORWARDING_PARAMETER_NODE
ForwardingParameterNode.
Definition ast.h:740
@ PM_BLOCK_LOCAL_VARIABLE_NODE
BlockLocalVariableNode.
Definition ast.h:608
@ PM_HASH_NODE
HashNode.
Definition ast.h:764
@ PM_UNTIL_NODE
UntilNode.
Definition ast.h:1001
@ PM_MATCH_PREDICATE_NODE
MatchPredicateNode.
Definition ast.h:863
@ PM_X_STRING_NODE
XStringNode.
Definition ast.h:1010
@ PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE
LocalVariableOperatorWriteNode.
Definition ast.h:845
@ PM_LOCAL_VARIABLE_OR_WRITE_NODE
LocalVariableOrWriteNode.
Definition ast.h:848
@ PM_INSTANCE_VARIABLE_AND_WRITE_NODE
InstanceVariableAndWriteNode.
Definition ast.h:797
@ PM_GLOBAL_VARIABLE_TARGET_NODE
GlobalVariableTargetNode.
Definition ast.h:758
@ PM_AND_NODE
AndNode.
Definition ast.h:581
@ PM_CONSTANT_TARGET_NODE
ConstantTargetNode.
Definition ast.h:698
@ PM_CONSTANT_PATH_AND_WRITE_NODE
ConstantPathAndWriteNode.
Definition ast.h:677
@ PM_IN_NODE
InNode.
Definition ast.h:782
@ PM_BLOCK_PARAMETER_NODE
BlockParameterNode.
Definition ast.h:614
@ PM_CAPTURE_PATTERN_NODE
CapturePatternNode.
Definition ast.h:638
@ PM_SOURCE_FILE_NODE
SourceFileNode.
Definition ast.h:971
@ PM_NO_KEYWORDS_PARAMETER_NODE
NoKeywordsParameterNode.
Definition ast.h:890
@ PM_CONSTANT_PATH_OPERATOR_WRITE_NODE
ConstantPathOperatorWriteNode.
Definition ast.h:683
@ PM_MULTI_TARGET_NODE
MultiTargetNode.
Definition ast.h:878
@ PM_SPLAT_NODE
SplatNode.
Definition ast.h:977
@ PM_LAMBDA_NODE
LambdaNode.
Definition ast.h:839
@ PM_CLASS_VARIABLE_READ_NODE
ClassVariableReadNode.
Definition ast.h:659
@ PM_REQUIRED_KEYWORD_PARAMETER_NODE
RequiredKeywordParameterNode.
Definition ast.h:941
@ PM_CALL_TARGET_NODE
CallTargetNode.
Definition ast.h:635
@ PM_ELSE_NODE
ElseNode.
Definition ast.h:710
@ PM_INTERPOLATED_MATCH_LAST_LINE_NODE
InterpolatedMatchLastLineNode.
Definition ast.h:818
@ PM_WHEN_NODE
WhenNode.
Definition ast.h:1004
@ PM_NUMBERED_PARAMETERS_NODE
NumberedParametersNode.
Definition ast.h:893
@ PM_SYMBOL_NODE
SymbolNode.
Definition ast.h:989
@ PM_RESCUE_MODIFIER_NODE
RescueModifierNode.
Definition ast.h:947
@ PM_ALIAS_METHOD_NODE
AliasMethodNode.
Definition ast.h:575
@ PM_MATCH_REQUIRED_NODE
MatchRequiredNode.
Definition ast.h:866
@ PM_FORWARDING_ARGUMENTS_NODE
ForwardingArgumentsNode.
Definition ast.h:737
@ PM_BACK_REFERENCE_READ_NODE
BackReferenceReadNode.
Definition ast.h:599
@ PM_BLOCK_ARGUMENT_NODE
BlockArgumentNode.
Definition ast.h:605
@ PM_MISSING_NODE
MissingNode.
Definition ast.h:872
@ PM_SELF_NODE
SelfNode.
Definition ast.h:962
@ PM_IMPLICIT_REST_NODE
ImplicitRestNode.
Definition ast.h:779
@ PM_TRUE_NODE
TrueNode.
Definition ast.h:992
@ PM_ASSOC_SPLAT_NODE
AssocSplatNode.
Definition ast.h:596
@ PM_CLASS_VARIABLE_AND_WRITE_NODE
ClassVariableAndWriteNode.
Definition ast.h:650
@ PM_RANGE_NODE
RangeNode.
Definition ast.h:929
@ PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE
InstanceVariableOperatorWriteNode.
Definition ast.h:800
@ PM_LOCAL_VARIABLE_READ_NODE
LocalVariableReadNode.
Definition ast.h:851
@ PM_NEXT_NODE
NextNode.
Definition ast.h:884
@ PM_INSTANCE_VARIABLE_OR_WRITE_NODE
InstanceVariableOrWriteNode.
Definition ast.h:803
@ PM_REGULAR_EXPRESSION_NODE
RegularExpressionNode.
Definition ast.h:938
@ PM_CLASS_VARIABLE_OR_WRITE_NODE
ClassVariableOrWriteNode.
Definition ast.h:656
@ PM_BLOCK_PARAMETERS_NODE
BlockParametersNode.
Definition ast.h:617
@ PM_CONSTANT_WRITE_NODE
ConstantWriteNode.
Definition ast.h:701
@ PM_HASH_PATTERN_NODE
HashPatternNode.
Definition ast.h:767
@ PM_INDEX_OPERATOR_WRITE_NODE
IndexOperatorWriteNode.
Definition ast.h:788
@ PM_UNDEF_NODE
UndefNode.
Definition ast.h:995
@ PM_ALTERNATION_PATTERN_NODE
AlternationPatternNode.
Definition ast.h:578
@ PM_ENSURE_NODE
EnsureNode.
Definition ast.h:719
@ PM_LOCAL_VARIABLE_WRITE_NODE
LocalVariableWriteNode.
Definition ast.h:857
@ PM_SINGLETON_CLASS_NODE
SingletonClassNode.
Definition ast.h:965
@ PM_KEYWORD_HASH_NODE
KeywordHashNode.
Definition ast.h:833
@ PM_PARENTHESES_NODE
ParenthesesNode.
Definition ast.h:911
@ PM_FOR_NODE
ForNode.
Definition ast.h:734
@ PM_CLASS_VARIABLE_WRITE_NODE
ClassVariableWriteNode.
Definition ast.h:665
@ PM_POST_EXECUTION_NODE
PostExecutionNode.
Definition ast.h:920
@ PM_CONSTANT_OPERATOR_WRITE_NODE
ConstantOperatorWriteNode.
Definition ast.h:671
@ PM_RETURN_NODE
ReturnNode.
Definition ast.h:959
@ PM_MODULE_NODE
ModuleNode.
Definition ast.h:875
@ PM_ARRAY_PATTERN_NODE
ArrayPatternNode.
Definition ast.h:590
@ PM_SUPER_NODE
SuperNode.
Definition ast.h:986
@ PM_MATCH_LAST_LINE_NODE
MatchLastLineNode.
Definition ast.h:860
@ PM_CONSTANT_PATH_NODE
ConstantPathNode.
Definition ast.h:680
@ PM_INTERPOLATED_SYMBOL_NODE
InterpolatedSymbolNode.
Definition ast.h:827
@ PM_CALL_AND_WRITE_NODE
CallAndWriteNode.
Definition ast.h:623
@ PM_OPTIONAL_KEYWORD_PARAMETER_NODE
OptionalKeywordParameterNode.
Definition ast.h:899
@ PM_CLASS_VARIABLE_TARGET_NODE
ClassVariableTargetNode.
Definition ast.h:662
@ PM_CASE_MATCH_NODE
CaseMatchNode.
Definition ast.h:641
@ PM_BREAK_NODE
BreakNode.
Definition ast.h:620
@ PM_CALL_OR_WRITE_NODE
CallOrWriteNode.
Definition ast.h:632
@ PM_IMAGINARY_NODE
ImaginaryNode.
Definition ast.h:773
@ PM_DEF_NODE
DefNode.
Definition ast.h:704
@ PM_CONSTANT_READ_NODE
ConstantReadNode.
Definition ast.h:695
@ PM_GLOBAL_VARIABLE_WRITE_NODE
GlobalVariableWriteNode.
Definition ast.h:761
@ PM_SOURCE_ENCODING_NODE
SourceEncodingNode.
Definition ast.h:968
@ PM_BEGIN_NODE
BeginNode.
Definition ast.h:602
@ PM_INTERPOLATED_X_STRING_NODE
InterpolatedXStringNode.
Definition ast.h:830
@ PM_INSTANCE_VARIABLE_READ_NODE
InstanceVariableReadNode.
Definition ast.h:806
@ PM_FLIP_FLOP_NODE
FlipFlopNode.
Definition ast.h:728
@ PM_PINNED_VARIABLE_NODE
PinnedVariableNode.
Definition ast.h:917
@ PM_REQUIRED_PARAMETER_NODE
RequiredParameterNode.
Definition ast.h:944
@ PM_INSTANCE_VARIABLE_WRITE_NODE
InstanceVariableWriteNode.
Definition ast.h:812
@ PM_INSTANCE_VARIABLE_TARGET_NODE
InstanceVariableTargetNode.
Definition ast.h:809
@ PM_GLOBAL_VARIABLE_AND_WRITE_NODE
GlobalVariableAndWriteNode.
Definition ast.h:746
@ PM_CASE_NODE
CaseNode.
Definition ast.h:644
@ PM_RESCUE_NODE
RescueNode.
Definition ast.h:950
@ PM_FLOAT_NODE
FloatNode.
Definition ast.h:731
@ PM_ASSOC_NODE
AssocNode.
Definition ast.h:593
@ PM_INTEGER_NODE
IntegerNode.
Definition ast.h:815
@ PM_LOCAL_VARIABLE_TARGET_NODE
LocalVariableTargetNode.
Definition ast.h:854
@ PM_STRING_NODE
StringNode.
Definition ast.h:983
@ PM_INDEX_OR_WRITE_NODE
IndexOrWriteNode.
Definition ast.h:791
@ PM_ALIAS_GLOBAL_VARIABLE_NODE
AliasGlobalVariableNode.
Definition ast.h:572
@ PM_PARAMETERS_NODE
ParametersNode.
Definition ast.h:908
@ PM_NUMBERED_REFERENCE_READ_NODE
NumberedReferenceReadNode.
Definition ast.h:896
@ PM_CONSTANT_PATH_OR_WRITE_NODE
ConstantPathOrWriteNode.
Definition ast.h:686
@ PM_GLOBAL_VARIABLE_OR_WRITE_NODE
GlobalVariableOrWriteNode.
Definition ast.h:752
@ PM_CONSTANT_OR_WRITE_NODE
ConstantOrWriteNode.
Definition ast.h:674
@ PM_STATEMENTS_NODE
StatementsNode.
Definition ast.h:980
@ PM_OPTIONAL_PARAMETER_NODE
OptionalParameterNode.
Definition ast.h:902
@ PM_PINNED_EXPRESSION_NODE
PinnedExpressionNode.
Definition ast.h:914
@ PM_BLOCK_NODE
BlockNode.
Definition ast.h:611
@ PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE
ClassVariableOperatorWriteNode.
Definition ast.h:653
@ PM_REST_PARAMETER_NODE
RestParameterNode.
Definition ast.h:953
@ PM_EMBEDDED_STATEMENTS_NODE
EmbeddedStatementsNode.
Definition ast.h:713
@ PM_INTERPOLATED_REGULAR_EXPRESSION_NODE
InterpolatedRegularExpressionNode.
Definition ast.h:821
@ PM_INDEX_TARGET_NODE
IndexTargetNode.
Definition ast.h:794
@ PM_KEYWORD_REST_PARAMETER_NODE
KeywordRestParameterNode.
Definition ast.h:836
struct pm_nil_node pm_nil_node_t
NilNode.
struct pm_begin_node pm_begin_node_t
BeginNode.
struct pm_statements_node pm_statements_node_t
StatementsNode.
struct pm_instance_variable_write_node pm_instance_variable_write_node_t
InstanceVariableWriteNode.
struct pm_false_node pm_false_node_t
FalseNode.
struct pm_keyword_hash_node pm_keyword_hash_node_t
KeywordHashNode.
struct pm_return_node pm_return_node_t
ReturnNode.
struct pm_constant_path_node pm_constant_path_node_t
ConstantPathNode.
struct pm_local_variable_write_node pm_local_variable_write_node_t
LocalVariableWriteNode.
@ PM_STRING_FLAGS_FROZEN
frozen by virtue of a frozen_string_literal comment
Definition ast.h:4587
@ PM_STRING_FLAGS_FORCED_BINARY_ENCODING
internal bytes forced the encoding to binary
Definition ast.h:4584
@ PM_STRING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:4581
struct pm_implicit_node pm_implicit_node_t
ImplicitNode.
struct pm_yield_node pm_yield_node_t
YieldNode.
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT
if arguments contain keyword splat
Definition ast.h:4461
struct pm_local_variable_and_write_node pm_local_variable_and_write_node_t
LocalVariableAndWriteNode.
struct pm_parameters_node pm_parameters_node_t
ParametersNode.
struct pm_lambda_node pm_lambda_node_t
LambdaNode.
#define PM_NODE_FLAG_P(node, flag)
Return true if the given flag is set on the given node.
Definition ast.h:1055
struct pm_module_node pm_module_node_t
ModuleNode.
struct pm_case_node pm_case_node_t
CaseNode.
struct pm_in_node pm_in_node_t
InNode.
struct pm_if_node pm_if_node_t
IfNode.
struct pm_constant_path_write_node pm_constant_path_write_node_t
ConstantPathWriteNode.
struct pm_pre_execution_node pm_pre_execution_node_t
PreExecutionNode.
struct pm_rescue_modifier_node pm_rescue_modifier_node_t
RescueModifierNode.
struct pm_splat_node pm_splat_node_t
SplatNode.
struct pm_match_write_node pm_match_write_node_t
MatchWriteNode.
struct pm_multi_write_node pm_multi_write_node_t
MultiWriteNode.
struct pm_local_variable_operator_write_node pm_local_variable_operator_write_node_t
LocalVariableOperatorWriteNode.
struct pm_block_argument_node pm_block_argument_node_t
BlockArgumentNode.
struct pm_interpolated_x_string_node pm_interpolated_x_string_node_t
InterpolatedXStringNode.
struct pm_constant_write_node pm_constant_write_node_t
ConstantWriteNode.
struct pm_required_keyword_parameter_node pm_required_keyword_parameter_node_t
RequiredKeywordParameterNode.
#define PM_NODE_TYPE_P(node, type)
Return true if the type of the given node matches the given type.
Definition ast.h:1050
#define PM_NODE_TYPE(node)
Cast the type to an enum to allow the compiler to provide exhaustiveness checking.
Definition ast.h:1045
struct pm_no_keywords_parameter_node pm_no_keywords_parameter_node_t
NoKeywordsParameterNode.
struct pm_alias_global_variable_node pm_alias_global_variable_node_t
AliasGlobalVariableNode.
struct pm_post_execution_node pm_post_execution_node_t
PostExecutionNode.
@ PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT
if array contains splat nodes
Definition ast.h:4469
struct pm_alias_method_node pm_alias_method_node_t
AliasMethodNode.
struct pm_keyword_rest_parameter_node pm_keyword_rest_parameter_node_t
KeywordRestParameterNode.
struct pm_global_variable_read_node pm_global_variable_read_node_t
GlobalVariableReadNode.
struct pm_back_reference_read_node pm_back_reference_read_node_t
BackReferenceReadNode.
struct pm_hash_node pm_hash_node_t
HashNode.
struct pm_block_local_variable_node pm_block_local_variable_node_t
BlockLocalVariableNode.
struct pm_multi_target_node pm_multi_target_node_t
MultiTargetNode.
@ PM_INTEGER_BASE_FLAGS_HEXADECIMAL
0x prefix
Definition ast.h:4511
@ PM_INTEGER_BASE_FLAGS_OCTAL
0o or 0 prefix
Definition ast.h:4508
@ PM_INTEGER_BASE_FLAGS_DECIMAL
0d or no prefix
Definition ast.h:4505
@ PM_INTEGER_BASE_FLAGS_BINARY
0b prefix
Definition ast.h:4502
struct pm_rational_node pm_rational_node_t
RationalNode.
struct pm_class_node pm_class_node_t
ClassNode.
struct pm_pinned_expression_node pm_pinned_expression_node_t
PinnedExpressionNode.
struct pm_constant_operator_write_node pm_constant_operator_write_node_t
ConstantOperatorWriteNode.
struct pm_ensure_node pm_ensure_node_t
EnsureNode.
struct pm_index_or_write_node pm_index_or_write_node_t
IndexOrWriteNode.
struct pm_forwarding_parameter_node pm_forwarding_parameter_node_t
ForwardingParameterNode.
struct pm_constant_or_write_node pm_constant_or_write_node_t
ConstantOrWriteNode.
struct pm_index_operator_write_node pm_index_operator_write_node_t
IndexOperatorWriteNode.
struct pm_when_node pm_when_node_t
WhenNode.
enum pm_token_type pm_token_type_t
This enum represents every type of token in the Ruby source.
struct pm_super_node pm_super_node_t
SuperNode.
struct pm_range_node pm_range_node_t
RangeNode.
struct pm_and_node pm_and_node_t
AndNode.
struct pm_constant_path_and_write_node pm_constant_path_and_write_node_t
ConstantPathAndWriteNode.
struct pm_rest_parameter_node pm_rest_parameter_node_t
RestParameterNode.
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
When we're serializing to Java, we want to skip serializing the location fields as they won't be used...
Definition ast.h:4610
struct pm_assoc_splat_node pm_assoc_splat_node_t
AssocSplatNode.
@ PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
&.
Definition ast.h:4477
@ PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE
a call that is an attribute write, so the value being written should be returned
Definition ast.h:4483
@ PM_CALL_NODE_FLAGS_VARIABLE_CALL
a call that could have been a local variable
Definition ast.h:4480
struct pm_constant_read_node pm_constant_read_node_t
ConstantReadNode.
struct pm_match_predicate_node pm_match_predicate_node_t
MatchPredicateNode.
struct pm_or_node pm_or_node_t
OrNode.
struct pm_case_match_node pm_case_match_node_t
CaseMatchNode.
struct pm_call_and_write_node pm_call_and_write_node_t
CallAndWriteNode.
struct pm_until_node pm_until_node_t
UntilNode.
struct pm_retry_node pm_retry_node_t
RetryNode.
struct pm_imaginary_node pm_imaginary_node_t
ImaginaryNode.
struct pm_array_pattern_node pm_array_pattern_node_t
ArrayPatternNode.
struct pm_redo_node pm_redo_node_t
RedoNode.
struct pm_source_encoding_node pm_source_encoding_node_t
SourceEncodingNode.
struct pm_true_node pm_true_node_t
TrueNode.
struct pm_break_node pm_break_node_t
BreakNode.
struct pm_integer_node pm_integer_node_t
IntegerNode.
struct pm_call_operator_write_node pm_call_operator_write_node_t
CallOperatorWriteNode.
struct pm_for_node pm_for_node_t
ForNode.
struct pm_required_parameter_node pm_required_parameter_node_t
RequiredParameterNode.
struct pm_symbol_node pm_symbol_node_t
SymbolNode.
struct pm_block_parameters_node pm_block_parameters_node_t
BlockParametersNode.
struct pm_alternation_pattern_node pm_alternation_pattern_node_t
AlternationPatternNode.
struct pm_parentheses_node pm_parentheses_node_t
ParenthesesNode.
@ PM_REGULAR_EXPRESSION_FLAGS_EUC_JP
e - forces the EUC-JP encoding
Definition ast.h:4555
@ PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE
i - ignores the case of characters when matching
Definition ast.h:4543
@ PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT
n - forces the ASCII-8BIT encoding
Definition ast.h:4558
@ PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE
m - allows $ to match the end of lines within strings
Definition ast.h:4549
@ PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
x - ignores whitespace and allows comments in regular expressions
Definition ast.h:4546
@ PM_REGULAR_EXPRESSION_FLAGS_ONCE
o - only interpolates values into the regular expression once
Definition ast.h:4552
@ PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J
s - forces the Windows-31J encoding
Definition ast.h:4561
@ PM_REGULAR_EXPRESSION_FLAGS_UTF_8
u - forces the UTF-8 encoding
Definition ast.h:4564
struct pm_forwarding_arguments_node pm_forwarding_arguments_node_t
ForwardingArgumentsNode.
struct pm_instance_variable_read_node pm_instance_variable_read_node_t
InstanceVariableReadNode.
struct pm_defined_node pm_defined_node_t
DefinedNode.
struct pm_interpolated_symbol_node pm_interpolated_symbol_node_t
InterpolatedSymbolNode.
struct pm_def_node pm_def_node_t
DefNode.
struct pm_singleton_class_node pm_singleton_class_node_t
SingletonClassNode.
uint16_t pm_node_flags_t
These are the flags embedded in the node struct.
Definition ast.h:1029
struct pm_capture_pattern_node pm_capture_pattern_node_t
CapturePatternNode.
struct pm_source_file_node pm_source_file_node_t
SourceFileNode.
struct pm_regular_expression_node pm_regular_expression_node_t
RegularExpressionNode.
@ PM_TOKEN_STAR_STAR
**
Definition ast.h:467
@ PM_TOKEN_DOT_DOT_DOT
the ... range operator or forwarding parameter
Definition ast.h:122
@ PM_TOKEN_MINUS_EQUAL
-=
Definition ast.h:383
@ PM_TOKEN_IGNORED_NEWLINE
an ignored newline
Definition ast.h:194
@ PM_TOKEN_BANG_EQUAL
!=
Definition ast.h:62
@ PM_TOKEN_KEYWORD___FILE__
FILE
Definition ast.h:347
@ PM_TOKEN_KEYWORD_WHEN
when
Definition ast.h:332
@ PM_TOKEN_FLOAT
a floating point number
Definition ast.h:158
@ PM_TOKEN_PLUS_EQUAL
+=
Definition ast.h:440
@ PM_TOKEN_DOT_DOT
the .
Definition ast.h:119
@ PM_TOKEN_UDOT_DOT
unary .
Definition ast.h:494
@ PM_TOKEN_AMPERSAND_DOT
&.
Definition ast.h:47
@ PM_TOKEN_NEWLINE
a newline character outside of other tokens
Definition ast.h:389
@ PM_TOKEN_NUMBERED_REFERENCE
a numbered reference to a capture group in the previous regular expression match
Definition ast.h:392
@ PM_TOKEN_AMPERSAND
&
Definition ast.h:38
@ PM_TOKEN_KEYWORD_YIELD
yield
Definition ast.h:341
@ PM_TOKEN_KEYWORD_END
end
Definition ast.h:251
@ PM_TOKEN_LAMBDA_BEGIN
{
Definition ast.h:359
@ PM_TOKEN_KEYWORD_UNTIL_MODIFIER
until in the modifier form
Definition ast.h:329
@ PM_TOKEN_EQUAL_EQUAL_EQUAL
===
Definition ast.h:149
@ PM_TOKEN_INTEGER_RATIONAL
an integer with a rational suffix
Definition ast.h:206
@ PM_TOKEN_USTAR
unary *
Definition ast.h:509
@ PM_TOKEN_TILDE
~ or ~@
Definition ast.h:485
@ PM_TOKEN_KEYWORD___ENCODING__
ENCODING
Definition ast.h:344
@ PM_TOKEN_REGEXP_END
the end of a regular expression
Definition ast.h:449
@ PM_TOKEN_KEYWORD_UNTIL
until
Definition ast.h:326
@ PM_TOKEN_COMMA
,
Definition ast.h:107
@ PM_TOKEN_MAXIMUM
The maximum token value.
Definition ast.h:521
@ PM_TOKEN_GREATER
Definition ast.h:173
@ PM_TOKEN_INTEGER
an integer (any base)
Definition ast.h:200
@ PM_TOKEN_SLASH_EQUAL
/=
Definition ast.h:458
@ PM_TOKEN_UMINUS_NUM
-@ for a number
Definition ast.h:503
@ PM_TOKEN_EMBVAR
Definition ast.h:140
@ PM_TOKEN_KEYWORD_UNLESS_MODIFIER
unless in the modifier form
Definition ast.h:323
@ PM_TOKEN_INTEGER_RATIONAL_IMAGINARY
an integer with a rational and imaginary suffix
Definition ast.h:209
@ PM_TOKEN_FLOAT_RATIONAL_IMAGINARY
a floating pointer number with a rational and imaginary suffix
Definition ast.h:167
@ PM_TOKEN_BRACKET_LEFT_RIGHT
[]
Definition ast.h:80
@ PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL
&&=
Definition ast.h:44
@ PM_TOKEN_KEYWORD_CLASS
class
Definition ast.h:230
@ PM_TOKEN_KEYWORD_BEGIN
begin
Definition ast.h:218
@ PM_TOKEN_NOT_PROVIDED
a token that was not present but it is okay
Definition ast.h:35
@ PM_TOKEN_USTAR_STAR
unary **
Definition ast.h:512
@ PM_TOKEN_GREATER_GREATER_EQUAL
Definition ast.h:182
@ PM_TOKEN_PERCENT_EQUAL
%=
Definition ast.h:407
@ PM_TOKEN_PERCENT
%
Definition ast.h:404
@ PM_TOKEN_KEYWORD_IN
in
Definition ast.h:272
@ PM_TOKEN_BANG
! or !@
Definition ast.h:59
@ PM_TOKEN_KEYWORD_NOT
not
Definition ast.h:284
@ PM_TOKEN_BRACKET_LEFT_ARRAY
[ for the beginning of an array
Definition ast.h:77
@ PM_TOKEN_HEREDOC_END
the end of a heredoc
Definition ast.h:185
@ PM_TOKEN_HEREDOC_START
the start of a heredoc
Definition ast.h:188
@ PM_TOKEN_KEYWORD_DEFINED
defined?
Definition ast.h:236
@ PM_TOKEN_UCOLON_COLON
unary ::
Definition ast.h:491
@ PM_TOKEN_LABEL_END
the end of a label
Definition ast.h:356
@ PM_TOKEN_EQUAL_GREATER
=>
Definition ast.h:152
@ PM_TOKEN_KEYWORD_UNLESS
unless
Definition ast.h:320
@ PM_TOKEN_KEYWORD_ENSURE
ensure
Definition ast.h:257
@ PM_TOKEN_AMPERSAND_EQUAL
&=
Definition ast.h:50
@ PM_TOKEN_EQUAL_EQUAL
==
Definition ast.h:146
@ PM_TOKEN_UPLUS
+@
Definition ast.h:506
@ PM_TOKEN_FLOAT_IMAGINARY
a floating pointer number with an imaginary suffix
Definition ast.h:161
@ PM_TOKEN_KEYWORD_BEGIN_UPCASE
BEGIN.
Definition ast.h:221
@ PM_TOKEN_LESS_EQUAL_GREATER
<=>
Definition ast.h:368
@ PM_TOKEN_KEYWORD_RESCUE_MODIFIER
rescue in the modifier form
Definition ast.h:296
@ PM_TOKEN_MISSING
a token that was expected but not found
Definition ast.h:32
@ PM_TOKEN_MINUS_GREATER
->
Definition ast.h:386
@ PM_TOKEN_KEYWORD_FALSE
false
Definition ast.h:260
@ PM_TOKEN_PIPE_PIPE_EQUAL
||=
Definition ast.h:434
@ PM_TOKEN_KEYWORD_IF
if
Definition ast.h:266
@ PM_TOKEN_EMBEXPR_BEGIN
#{
Definition ast.h:134
@ PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES
( for a parentheses node
Definition ast.h:398
@ PM_TOKEN_EMBDOC_END
=end
Definition ast.h:128
@ PM_TOKEN_KEYWORD_ELSE
else
Definition ast.h:245
@ PM_TOKEN_BACK_REFERENCE
a back reference
Definition ast.h:56
@ PM_TOKEN_BRACKET_LEFT
[
Definition ast.h:74
@ PM_TOKEN_EOF
final token in the file
Definition ast.h:29
@ PM_TOKEN_PIPE_PIPE
||
Definition ast.h:431
@ PM_TOKEN_KEYWORD_NIL
nil
Definition ast.h:281
@ PM_TOKEN_PERCENT_UPPER_W
W
Definition ast.h:422
@ PM_TOKEN_KEYWORD_RETURN
return
Definition ast.h:302
@ PM_TOKEN_CLASS_VARIABLE
a class variable
Definition ast.h:98
@ PM_TOKEN_PIPE
|
Definition ast.h:425
@ PM_TOKEN_PARENTHESIS_LEFT
(
Definition ast.h:395
@ PM_TOKEN_BANG_TILDE
!~
Definition ast.h:65
@ PM_TOKEN_DOT
the .
Definition ast.h:116
@ PM_TOKEN_PARENTHESIS_RIGHT
)
Definition ast.h:401
@ PM_TOKEN_KEYWORD_RESCUE
rescue
Definition ast.h:293
@ PM_TOKEN_INSTANCE_VARIABLE
an instance variable
Definition ast.h:197
@ PM_TOKEN_PIPE_EQUAL
|=
Definition ast.h:428
@ PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL
[]=
Definition ast.h:83
@ PM_TOKEN_UAMPERSAND
unary &
Definition ast.h:488
@ PM_TOKEN_MINUS
Definition ast.h:380
@ PM_TOKEN_CONSTANT
a constant
Definition ast.h:113
@ PM_TOKEN_IDENTIFIER
an identifier
Definition ast.h:191
@ PM_TOKEN_EMBDOC_BEGIN
=begin
Definition ast.h:125
@ PM_TOKEN_STAR_EQUAL
*=
Definition ast.h:464
@ PM_TOKEN_KEYWORD_OR
or
Definition ast.h:287
@ PM_TOKEN_KEYWORD_AND
and
Definition ast.h:215
@ PM_TOKEN_LESS
<
Definition ast.h:362
@ PM_TOKEN_KEYWORD_BREAK
break
Definition ast.h:224
@ PM_TOKEN_PERCENT_LOWER_W
w
Definition ast.h:413
@ PM_TOKEN_SYMBOL_BEGIN
the beginning of a symbol
Definition ast.h:482
@ PM_TOKEN_METHOD_NAME
a method name
Definition ast.h:377
@ PM_TOKEN_KEYWORD_CASE
case
Definition ast.h:227
@ PM_TOKEN_WORDS_SEP
a separator between words in a list
Definition ast.h:515
@ PM_TOKEN_FLOAT_RATIONAL
a floating pointer number with a rational suffix
Definition ast.h:164
@ PM_TOKEN_LESS_LESS_EQUAL
<<=
Definition ast.h:374
@ PM_TOKEN_EMBDOC_LINE
a line inside of embedded documentation
Definition ast.h:131
@ PM_TOKEN_KEYWORD_SUPER
super
Definition ast.h:308
@ PM_TOKEN_KEYWORD_DO
do
Definition ast.h:239
@ PM_TOKEN_KEYWORD_REDO
redo
Definition ast.h:290
@ PM_TOKEN_EQUAL_TILDE
=~
Definition ast.h:155
@ PM_TOKEN_EMBEXPR_END
}
Definition ast.h:137
@ PM_TOKEN_KEYWORD_END_UPCASE
END.
Definition ast.h:254
@ PM_TOKEN_KEYWORD___LINE__
LINE
Definition ast.h:350
@ PM_TOKEN_STRING_END
the end of a string
Definition ast.h:479
@ PM_TOKEN_STRING_CONTENT
the contents of a string
Definition ast.h:476
@ PM_TOKEN_BRACE_LEFT
{
Definition ast.h:68
@ PM_TOKEN_COLON_COLON
::
Definition ast.h:104
@ PM_TOKEN_GREATER_GREATER
Definition ast.h:179
@ PM_TOKEN_PERCENT_LOWER_X
x
Definition ast.h:416
@ PM_TOKEN_KEYWORD_SELF
self
Definition ast.h:305
@ PM_TOKEN_PERCENT_LOWER_I
i
Definition ast.h:410
@ PM_TOKEN_KEYWORD_ALIAS
alias
Definition ast.h:212
@ PM_TOKEN_GLOBAL_VARIABLE
a global variable
Definition ast.h:170
@ PM_TOKEN_KEYWORD_IF_MODIFIER
if in the modifier form
Definition ast.h:269
@ PM_TOKEN_SLASH
/
Definition ast.h:455
@ PM_TOKEN_KEYWORD_RETRY
retry
Definition ast.h:299
@ PM_TOKEN_COLON
:
Definition ast.h:101
@ PM_TOKEN_KEYWORD_UNDEF
undef
Definition ast.h:317
@ PM_TOKEN_BRACKET_RIGHT
]
Definition ast.h:86
@ PM_TOKEN_KEYWORD_FOR
for
Definition ast.h:263
@ PM_TOKEN_KEYWORD_THEN
then
Definition ast.h:311
@ PM_TOKEN_QUESTION_MARK
?
Definition ast.h:443
@ PM_TOKEN___END__
marker for the point in the file at which the parser should stop
Definition ast.h:518
@ PM_TOKEN_KEYWORD_WHILE
while
Definition ast.h:335
@ PM_TOKEN_EQUAL
=
Definition ast.h:143
@ PM_TOKEN_KEYWORD_DEF
def
Definition ast.h:233
@ PM_TOKEN_UDOT_DOT_DOT
unary ... operator
Definition ast.h:497
@ PM_TOKEN_STAR
Definition ast.h:461
@ PM_TOKEN_KEYWORD_WHILE_MODIFIER
while in the modifier form
Definition ast.h:338
@ PM_TOKEN_KEYWORD_TRUE
true
Definition ast.h:314
@ PM_TOKEN_BRACE_RIGHT
}
Definition ast.h:71
@ PM_TOKEN_SEMICOLON
;
Definition ast.h:452
@ PM_TOKEN_REGEXP_BEGIN
the beginning of a regular expression
Definition ast.h:446
@ PM_TOKEN_CARET
^
Definition ast.h:89
@ PM_TOKEN_PERCENT_UPPER_I
I
Definition ast.h:419
@ PM_TOKEN_KEYWORD_DO_LOOP
do keyword for a predicate in a while, until, or for loop
Definition ast.h:242
@ PM_TOKEN_KEYWORD_MODULE
module
Definition ast.h:275
@ PM_TOKEN_PLUS
Definition ast.h:437
@ PM_TOKEN_KEYWORD_NEXT
next
Definition ast.h:278
@ PM_TOKEN_BACKTICK
`
Definition ast.h:53
@ PM_TOKEN_INTEGER_IMAGINARY
an integer with an imaginary suffix
Definition ast.h:203
@ PM_TOKEN_LABEL
a label
Definition ast.h:353
@ PM_TOKEN_STAR_STAR_EQUAL
**=
Definition ast.h:470
@ PM_TOKEN_CHARACTER_LITERAL
a character literal
Definition ast.h:95
@ PM_TOKEN_AMPERSAND_AMPERSAND
&&
Definition ast.h:41
@ PM_TOKEN_UMINUS
-@
Definition ast.h:500
@ PM_TOKEN_LESS_LESS
<<
Definition ast.h:371
@ PM_TOKEN_GREATER_EQUAL
>=
Definition ast.h:176
@ PM_TOKEN_COMMENT
a comment
Definition ast.h:110
@ PM_TOKEN_CARET_EQUAL
^=
Definition ast.h:92
@ PM_TOKEN_KEYWORD_ELSIF
elsif
Definition ast.h:248
@ PM_TOKEN_STRING_BEGIN
the beginning of a string
Definition ast.h:473
@ PM_TOKEN_LESS_EQUAL
<=
Definition ast.h:365
struct pm_global_variable_or_write_node pm_global_variable_or_write_node_t
GlobalVariableOrWriteNode.
struct pm_rescue_node pm_rescue_node_t
RescueNode.
struct pm_array_node pm_array_node_t
ArrayNode.
struct pm_while_node pm_while_node_t
WhileNode.
struct pm_global_variable_write_node pm_global_variable_write_node_t
GlobalVariableWriteNode.
struct pm_instance_variable_or_write_node pm_instance_variable_or_write_node_t
InstanceVariableOrWriteNode.
struct pm_source_line_node pm_source_line_node_t
SourceLineNode.
@ PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition ast.h:4491
struct pm_numbered_parameters_node pm_numbered_parameters_node_t
NumberedParametersNode.
struct pm_class_variable_operator_write_node pm_class_variable_operator_write_node_t
ClassVariableOperatorWriteNode.
struct pm_next_node pm_next_node_t
NextNode.
struct pm_unless_node pm_unless_node_t
UnlessNode.
struct pm_interpolated_regular_expression_node pm_interpolated_regular_expression_node_t
InterpolatedRegularExpressionNode.
struct pm_string_node pm_string_node_t
StringNode.
struct pm_float_node pm_float_node_t
FloatNode.
struct pm_global_variable_operator_write_node pm_global_variable_operator_write_node_t
GlobalVariableOperatorWriteNode.
struct pm_instance_variable_operator_write_node pm_instance_variable_operator_write_node_t
InstanceVariableOperatorWriteNode.
@ PM_LOOP_FLAGS_BEGIN_MODIFIER
a loop after a begin statement, so the body is executed first before the condition
Definition ast.h:4527
struct pm_pinned_variable_node pm_pinned_variable_node_t
PinnedVariableNode.
struct pm_instance_variable_and_write_node pm_instance_variable_and_write_node_t
InstanceVariableAndWriteNode.
struct pm_program_node pm_program_node_t
ProgramNode.
struct pm_find_pattern_node pm_find_pattern_node_t
FindPatternNode.
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition diagnostic.h:44
VALUE type(ANYARGS)
ANYARGS-ed function type.
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition parser.h:84
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition parser.h:254
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition parser.h:259
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition parser.h:51
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition parser.h:74
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:425
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition parser.h:266
@ PM_CONTEXT_ELSIF
an elsif clause
Definition parser.h:298
@ PM_CONTEXT_ENSURE_DEF
an ensure statement within a method definition
Definition parser.h:307
@ PM_CONTEXT_ELSE
an else clause
Definition parser.h:295
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition parser.h:313
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition parser.h:277
@ PM_CONTEXT_MODULE
a module declaration
Definition parser.h:328
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition parser.h:289
@ PM_CONTEXT_CASE_IN
a case in statements
Definition parser.h:280
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition parser.h:355
@ PM_CONTEXT_UNLESS
an unless statement
Definition parser.h:358
@ PM_CONTEXT_POSTEXE
an END block
Definition parser.h:334
@ PM_CONTEXT_IF
an if statement
Definition parser.h:316
@ PM_CONTEXT_RESCUE_DEF
a rescue statement within a method definition
Definition parser.h:352
@ PM_CONTEXT_RESCUE
a rescue statement
Definition parser.h:349
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition parser.h:319
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition parser.h:331
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition parser.h:271
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition parser.h:340
@ PM_CONTEXT_RESCUE_ELSE
a rescue else statement
Definition parser.h:343
@ PM_CONTEXT_UNTIL
an until statement
Definition parser.h:361
@ PM_CONTEXT_FOR
a for loop
Definition parser.h:310
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition parser.h:337
@ PM_CONTEXT_RESCUE_ELSE_DEF
a rescue else statement within a method definition
Definition parser.h:346
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition parser.h:292
@ PM_CONTEXT_CLASS
a class declaration
Definition parser.h:283
@ PM_CONTEXT_MAIN
the top level context
Definition parser.h:325
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition parser.h:322
@ PM_CONTEXT_BEGIN
a begin statement
Definition parser.h:268
@ PM_CONTEXT_ENSURE
an ensure statement
Definition parser.h:304
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition parser.h:301
@ PM_CONTEXT_DEF
a method definition
Definition parser.h:286
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition parser.h:274
@ PM_CONTEXT_WHILE
a while statement
Definition parser.h:364
#define PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS
Temporary alias for the PM_NODE_FLAG_STATIC_KEYS flag.
Definition parser.h:24
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition parser.h:377
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
#define PM_STRING_EMPTY
Defines an empty string.
Definition pm_string.h:65
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition defines.h:56
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition defines.h:91
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition defines.h:32
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition encoding.h:236
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition encoding.h:229
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition encoding.h:68
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition encoding.h:74
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:12
The main header file for the prism parser.
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition serialize.c:1934
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition serialize.c:1923
PRISM_EXPORTED_FUNCTION const char * pm_token_type_to_str(pm_token_type_t token_type)
Returns a string representation of the given token type.
Definition token_type.c:16
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition serialize.c:1858
#define errno
Ractor-aware version of errno.
Definition ruby.h:388
AliasGlobalVariableNode.
Definition ast.h:1088
AliasMethodNode.
Definition ast.h:1109
AlternationPatternNode.
Definition ast.h:1130
AndNode.
Definition ast.h:1151
struct pm_node * left
AndNode#left.
Definition ast.h:1156
struct pm_node * right
AndNode#right.
Definition ast.h:1159
ArgumentsNode.
Definition ast.h:1174
pm_node_t base
The embedded base node.
Definition ast.h:1176
struct pm_node_list arguments
ArgumentsNode#arguments.
Definition ast.h:1179
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:804
pm_node_t * block
The optional block attached to the call.
Definition prism.c:815
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:806
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:809
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:812
ArrayNode.
Definition ast.h:1191
pm_node_t base
The embedded base node.
Definition ast.h:1193
pm_location_t closing_loc
ArrayNode#closing_loc.
Definition ast.h:1202
struct pm_node_list elements
ArrayNode#elements.
Definition ast.h:1196
pm_location_t opening_loc
ArrayNode#opening_loc.
Definition ast.h:1199
ArrayPatternNode.
Definition ast.h:1212
struct pm_node_list requireds
ArrayPatternNode#requireds.
Definition ast.h:1220
struct pm_node * rest
ArrayPatternNode#rest.
Definition ast.h:1223
struct pm_node * constant
ArrayPatternNode#constant.
Definition ast.h:1217
pm_location_t opening_loc
ArrayPatternNode#opening_loc.
Definition ast.h:1229
pm_node_t base
The embedded base node.
Definition ast.h:1214
pm_location_t closing_loc
ArrayPatternNode#closing_loc.
Definition ast.h:1232
struct pm_node_list posts
ArrayPatternNode#posts.
Definition ast.h:1226
AssocNode.
Definition ast.h:1242
pm_node_t base
The embedded base node.
Definition ast.h:1244
struct pm_node * value
AssocNode#value.
Definition ast.h:1250
struct pm_node * key
AssocNode#key.
Definition ast.h:1247
AssocSplatNode.
Definition ast.h:1263
BackReferenceReadNode.
Definition ast.h:1281
BeginNode.
Definition ast.h:1296
struct pm_ensure_node * ensure_clause
BeginNode#ensure_clause.
Definition ast.h:1313
struct pm_rescue_node * rescue_clause
BeginNode#rescue_clause.
Definition ast.h:1307
struct pm_statements_node * statements
BeginNode#statements.
Definition ast.h:1304
pm_node_t base
The embedded base node.
Definition ast.h:1298
pm_location_t end_keyword_loc
BeginNode#end_keyword_loc.
Definition ast.h:1316
pm_location_t begin_keyword_loc
BeginNode#begin_keyword_loc.
Definition ast.h:1301
struct pm_else_node * else_clause
BeginNode#else_clause.
Definition ast.h:1310
This struct represents a set of binding powers used for a given token.
Definition prism.c:10218
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:10226
pm_binding_power_t left
The left binding power.
Definition prism.c:10220
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:10232
pm_binding_power_t right
The right binding power.
Definition prism.c:10223
BlockArgumentNode.
Definition ast.h:1326
BlockLocalVariableNode.
Definition ast.h:1344
pm_node_t base
The embedded base node.
Definition ast.h:1346
BlockNode.
Definition ast.h:1359
pm_node_t base
The embedded base node.
Definition ast.h:1361
BlockParameterNode.
Definition ast.h:1389
BlockParametersNode.
Definition ast.h:1410
pm_node_t base
The embedded base node.
Definition ast.h:1412
struct pm_node_list locals
BlockParametersNode#locals.
Definition ast.h:1418
pm_location_t closing_loc
BlockParametersNode#closing_loc.
Definition ast.h:1424
BreakNode.
Definition ast.h:1434
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition pm_buffer.h:21
size_t length
The length of the buffer in bytes.
Definition pm_buffer.h:23
char * value
A pointer to the start of the buffer.
Definition pm_buffer.h:29
CallAndWriteNode.
Definition ast.h:1456
pm_constant_id_t read_name
CallAndWriteNode#read_name.
Definition ast.h:1470
pm_constant_id_t write_name
CallAndWriteNode#write_name.
Definition ast.h:1473
CallNode.
Definition ast.h:1493
pm_location_t opening_loc
CallNode#opening_loc.
Definition ast.h:1510
pm_location_t closing_loc
CallNode#closing_loc.
Definition ast.h:1516
struct pm_node * receiver
CallNode#receiver.
Definition ast.h:1498
pm_constant_id_t name
CallNode::name.
Definition ast.h:1504
pm_node_t base
The embedded base node.
Definition ast.h:1495
pm_location_t call_operator_loc
CallNode#call_operator_loc.
Definition ast.h:1501
pm_location_t message_loc
CallNode#message_loc.
Definition ast.h:1507
struct pm_arguments_node * arguments
CallNode#arguments.
Definition ast.h:1513
struct pm_node * block
CallNode#block.
Definition ast.h:1519
CallOperatorWriteNode.
Definition ast.h:1533
pm_constant_id_t read_name
CallOperatorWriteNode#read_name.
Definition ast.h:1547
pm_constant_id_t write_name
CallOperatorWriteNode#write_name.
Definition ast.h:1550
CallOrWriteNode.
Definition ast.h:1573
pm_constant_id_t write_name
CallOrWriteNode#write_name.
Definition ast.h:1590
pm_constant_id_t read_name
CallOrWriteNode#read_name.
Definition ast.h:1587
CallTargetNode.
Definition ast.h:1610
CapturePatternNode.
Definition ast.h:1634
CaseMatchNode.
Definition ast.h:1655
pm_location_t end_keyword_loc
CaseMatchNode#end_keyword_loc.
Definition ast.h:1672
struct pm_node_list conditions
CaseMatchNode#conditions.
Definition ast.h:1663
struct pm_else_node * consequent
CaseMatchNode#consequent.
Definition ast.h:1666
pm_node_t base
The embedded base node.
Definition ast.h:1657
CaseNode.
Definition ast.h:1682
struct pm_else_node * consequent
CaseNode#consequent.
Definition ast.h:1693
struct pm_node_list conditions
CaseNode#conditions.
Definition ast.h:1690
pm_node_t base
The embedded base node.
Definition ast.h:1684
pm_location_t end_keyword_loc
CaseNode#end_keyword_loc.
Definition ast.h:1699
ClassNode.
Definition ast.h:1709
ClassVariableAndWriteNode.
Definition ast.h:1745
ClassVariableOperatorWriteNode.
Definition ast.h:1769
ClassVariableOrWriteNode.
Definition ast.h:1796
ClassVariableReadNode.
Definition ast.h:1820
pm_node_t base
The embedded base node.
Definition ast.h:1822
pm_constant_id_t name
ClassVariableReadNode#name.
Definition ast.h:1825
ClassVariableTargetNode.
Definition ast.h:1835
ClassVariableWriteNode.
Definition ast.h:1850
This is a node in the linked list of comments that we've found while parsing.
Definition parser.h:387
pm_location_t location
The location of the comment in the source.
Definition parser.h:392
ConstantAndWriteNode.
Definition ast.h:1874
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantOperatorWriteNode.
Definition ast.h:1898
ConstantOrWriteNode.
Definition ast.h:1925
ConstantPathAndWriteNode.
Definition ast.h:1949
ConstantPathNode.
Definition ast.h:1970
pm_node_t base
The embedded base node.
Definition ast.h:1972
ConstantPathOperatorWriteNode.
Definition ast.h:1991
ConstantPathOrWriteNode.
Definition ast.h:2015
ConstantPathTargetNode.
Definition ast.h:2036
ConstantPathWriteNode.
Definition ast.h:2057
ConstantReadNode.
Definition ast.h:2078
pm_node_t base
The embedded base node.
Definition ast.h:2080
pm_constant_id_t name
ConstantReadNode#name.
Definition ast.h:2083
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition ast.h:2093
ConstantWriteNode.
Definition ast.h:2108
This is a node in a linked list of contexts.
Definition parser.h:368
pm_context_t context
The context that this node represents.
Definition parser.h:370
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition parser.h:373
DefNode.
Definition ast.h:2132
DefinedNode.
Definition ast.h:2183
ElseNode.
Definition ast.h:2207
struct pm_statements_node * statements
ElseNode#statements.
Definition ast.h:2215
pm_node_t base
The embedded base node.
Definition ast.h:2209
pm_location_t end_keyword_loc
ElseNode#end_keyword_loc.
Definition ast.h:2218
EmbeddedStatementsNode.
Definition ast.h:2228
EmbeddedVariableNode.
Definition ast.h:2249
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition encoding.h:50
const char * name
The name of the encoding.
Definition encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition encoding.h:43
EnsureNode.
Definition ast.h:2267
pm_node_t base
The embedded base node.
Definition ast.h:2269
FalseNode.
Definition ast.h:2288
FindPatternNode.
Definition ast.h:2300
struct pm_node * constant
FindPatternNode#constant.
Definition ast.h:2305
pm_location_t opening_loc
FindPatternNode#opening_loc.
Definition ast.h:2317
pm_node_t base
The embedded base node.
Definition ast.h:2302
struct pm_node_list requireds
FindPatternNode#requireds.
Definition ast.h:2311
pm_location_t closing_loc
FindPatternNode#closing_loc.
Definition ast.h:2320
FlipFlopNode.
Definition ast.h:2332
FloatNode.
Definition ast.h:2353
ForNode.
Definition ast.h:2365
ForwardingArgumentsNode.
Definition ast.h:2398
ForwardingParameterNode.
Definition ast.h:2410
ForwardingSuperNode.
Definition ast.h:2422
GlobalVariableAndWriteNode.
Definition ast.h:2437
GlobalVariableOperatorWriteNode.
Definition ast.h:2461
GlobalVariableOrWriteNode.
Definition ast.h:2488
GlobalVariableReadNode.
Definition ast.h:2512
GlobalVariableTargetNode.
Definition ast.h:2527
GlobalVariableWriteNode.
Definition ast.h:2542
HashNode.
Definition ast.h:2566
struct pm_node_list elements
HashNode#elements.
Definition ast.h:2574
pm_node_t base
The embedded base node.
Definition ast.h:2568
pm_location_t closing_loc
HashNode#closing_loc.
Definition ast.h:2577
HashPatternNode.
Definition ast.h:2587
struct pm_node_list elements
HashPatternNode#elements.
Definition ast.h:2595
pm_location_t opening_loc
HashPatternNode#opening_loc.
Definition ast.h:2601
pm_node_t base
The embedded base node.
Definition ast.h:2589
pm_location_t closing_loc
HashPatternNode#closing_loc.
Definition ast.h:2604
struct pm_node * constant
HashPatternNode#constant.
Definition ast.h:2592
IfNode.
Definition ast.h:2614
struct pm_node * consequent
IfNode#consequent.
Definition ast.h:2631
pm_location_t end_keyword_loc
IfNode#end_keyword_loc.
Definition ast.h:2634
struct pm_statements_node * statements
IfNode#statements.
Definition ast.h:2628
pm_node_t base
The embedded base node.
Definition ast.h:2616
ImaginaryNode.
Definition ast.h:2644
ImplicitNode.
Definition ast.h:2659
ImplicitRestNode.
Definition ast.h:2674
InNode.
Definition ast.h:2686
IndexAndWriteNode.
Definition ast.h:2714
IndexOperatorWriteNode.
Definition ast.h:2754
IndexOrWriteNode.
Definition ast.h:2797
IndexTargetNode.
Definition ast.h:2837
InstanceVariableAndWriteNode.
Definition ast.h:2864
InstanceVariableOperatorWriteNode.
Definition ast.h:2888
InstanceVariableOrWriteNode.
Definition ast.h:2915
InstanceVariableReadNode.
Definition ast.h:2939
pm_constant_id_t name
InstanceVariableReadNode#name.
Definition ast.h:2944
pm_node_t base
The embedded base node.
Definition ast.h:2941
InstanceVariableTargetNode.
Definition ast.h:2954
InstanceVariableWriteNode.
Definition ast.h:2969
IntegerNode.
Definition ast.h:2998
InterpolatedMatchLastLineNode.
Definition ast.h:3022
InterpolatedRegularExpressionNode.
Definition ast.h:3055
struct pm_node_list parts
InterpolatedRegularExpressionNode#parts.
Definition ast.h:3063
pm_node_t base
The embedded base node.
Definition ast.h:3057
pm_location_t closing_loc
InterpolatedRegularExpressionNode#closing_loc.
Definition ast.h:3066
InterpolatedStringNode.
Definition ast.h:3076
pm_node_t base
The embedded base node.
Definition ast.h:3078
pm_location_t closing_loc
InterpolatedStringNode#closing_loc.
Definition ast.h:3087
pm_location_t opening_loc
InterpolatedStringNode#opening_loc.
Definition ast.h:3081
struct pm_node_list parts
InterpolatedStringNode#parts.
Definition ast.h:3084
InterpolatedSymbolNode.
Definition ast.h:3097
struct pm_node_list parts
InterpolatedSymbolNode#parts.
Definition ast.h:3105
pm_location_t opening_loc
InterpolatedSymbolNode#opening_loc.
Definition ast.h:3102
pm_node_t base
The embedded base node.
Definition ast.h:3099
InterpolatedXStringNode.
Definition ast.h:3118
pm_location_t opening_loc
InterpolatedXStringNode#opening_loc.
Definition ast.h:3123
pm_location_t closing_loc
InterpolatedXStringNode#closing_loc.
Definition ast.h:3129
pm_node_t base
The embedded base node.
Definition ast.h:3120
struct pm_node_list parts
InterpolatedXStringNode#parts.
Definition ast.h:3126
KeywordHashNode.
Definition ast.h:3141
pm_node_t base
The embedded base node.
Definition ast.h:3143
struct pm_node_list elements
KeywordHashNode#elements.
Definition ast.h:3146
KeywordRestParameterNode.
Definition ast.h:3156
LambdaNode.
Definition ast.h:3177
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition parser.h:447
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition parser.h:441
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition parser.h:97
uint8_t terminator
This is the terminator of the list literal.
Definition parser.h:153
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition parser.h:219
size_t nesting
This keeps track of the nesting level of the list.
Definition parser.h:141
bool interpolation
Whether or not interpolation is allowed in this list.
Definition parser.h:144
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition parser.h:228
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
Definition parser.h:150
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition parser.h:246
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition parser.h:225
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
Definition parser.h:196
union pm_lex_mode::@91 as
The data associated with this type of lex mode.
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
Definition parser.h:234
size_t ident_length
The length of the heredoc identifier.
Definition parser.h:222
size_t common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition parser.h:241
enum pm_lex_mode::@90 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
Definition parser.h:159
This struct represents an abstract linked list that provides common functionality.
Definition pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition pm_list.h:48
This represents the overall linked list.
Definition pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition pm_list.h:60
LocalVariableAndWriteNode.
Definition ast.h:3210
LocalVariableOperatorWriteNode.
Definition ast.h:3237
LocalVariableOrWriteNode.
Definition ast.h:3267
LocalVariableReadNode.
Definition ast.h:3294
uint32_t depth
LocalVariableReadNode#depth.
Definition ast.h:3302
pm_constant_id_t name
LocalVariableReadNode#name.
Definition ast.h:3299
LocalVariableTargetNode.
Definition ast.h:3312
LocalVariableWriteNode.
Definition ast.h:3330
This represents a range of bytes in the source string to which a node or token corresponds.
Definition ast.h:543
const uint8_t * start
A pointer to the start location of the range in the source.
Definition ast.h:545
const uint8_t * end
A pointer to the end location of the range in the source.
Definition ast.h:548
This is a node in the linked list of magic comments that we've found while parsing.
Definition parser.h:404
MatchLastLineNode.
Definition ast.h:3369
MatchPredicateNode.
Definition ast.h:3393
MatchRequiredNode.
Definition ast.h:3414
MatchWriteNode.
Definition ast.h:3435
MissingNode.
Definition ast.h:3453
ModuleNode.
Definition ast.h:3465
MultiTargetNode.
Definition ast.h:3495
pm_node_t base
The embedded base node.
Definition ast.h:3497
pm_location_t lparen_loc
MultiTargetNode#lparen_loc.
Definition ast.h:3509
struct pm_node_list lefts
MultiTargetNode#lefts.
Definition ast.h:3500
struct pm_node * rest
MultiTargetNode#rest.
Definition ast.h:3503
pm_location_t rparen_loc
MultiTargetNode#rparen_loc.
Definition ast.h:3512
struct pm_node_list rights
MultiTargetNode#rights.
Definition ast.h:3506
MultiWriteNode.
Definition ast.h:3522
NextNode.
Definition ast.h:3555
NilNode.
Definition ast.h:3573
NoKeywordsParameterNode.
Definition ast.h:3585
A list of nodes in the source, most often used for lists of children.
Definition ast.h:556
size_t size
The number of nodes in the list.
Definition ast.h:558
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:564
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1061
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1066
pm_node_flags_t flags
This represents any flags on the node.
Definition ast.h:1072
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1078
NumberedParametersNode.
Definition ast.h:3603
NumberedReferenceReadNode.
Definition ast.h:3618
OptionalKeywordParameterNode.
Definition ast.h:3633
OptionalParameterNode.
Definition ast.h:3654
A scope of locals surrounding the code that is being parsed.
Definition options.h:19
size_t locals_count
The number of locals in the scope.
Definition options.h:21
The options that can be passed to the parser.
Definition options.h:30
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition options.h:49
bool suppress_warnings
Whether or not we should suppress warnings.
Definition options.h:66
pm_string_t encoding
The name of the encoding that the source file is in.
Definition options.h:44
bool frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition options.h:59
int32_t line
The line within the file that the parse starts on.
Definition options.h:38
pm_string_t filepath
The name of the file that is currently being parsed.
Definition options.h:32
OrNode.
Definition ast.h:3678
struct pm_node * left
OrNode#left.
Definition ast.h:3683
struct pm_node * right
OrNode#right.
Definition ast.h:3686
ParametersNode.
Definition ast.h:3699
struct pm_node * rest
ParametersNode#rest.
Definition ast.h:3710
struct pm_node_list requireds
ParametersNode#requireds.
Definition ast.h:3704
struct pm_block_parameter_node * block
ParametersNode#block.
Definition ast.h:3722
struct pm_node_list optionals
ParametersNode#optionals.
Definition ast.h:3707
struct pm_node_list posts
ParametersNode#posts.
Definition ast.h:3713
pm_node_t base
The embedded base node.
Definition ast.h:3701
struct pm_node * keyword_rest
ParametersNode#keyword_rest.
Definition ast.h:3719
struct pm_node_list keywords
ParametersNode#keywords.
Definition ast.h:3716
ParenthesesNode.
Definition ast.h:3732
struct pm_node * body
ParenthesesNode#body.
Definition ast.h:3737
This struct represents the overall parser.
Definition parser.h:489
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition parser.h:669
pm_lex_state_t lex_state
The current state of the lexer.
Definition parser.h:491
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition parser.h:584
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition parser.h:688
const uint8_t * end
The pointer to the end of the source.
Definition parser.h:536
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition parser.h:675
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition parser.h:626
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition parser.h:615
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition parser.h:691
bool suppress_warnings
Whether or not we should emit warnings.
Definition parser.h:713
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition parser.h:549
pm_string_t filepath_string
This is the path of the file being parsed.
Definition parser.h:609
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition parser.h:563
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition parser.h:500
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition parser.h:603
pm_token_t previous
The previous token we were considering.
Definition parser.h:539
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition parser.h:632
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition parser.h:682
pm_location_t data_loc
The optional location of the END keyword and its contents.
Definition parser.h:566
pm_context_node_t * current_context
The current parsing context.
Definition parser.h:578
pm_constant_id_t current_param_name
The current parameter name id on parsing its default value.
Definition parser.h:694
const uint8_t * start
The pointer to the start of the source.
Definition parser.h:533
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition parser.h:494
pm_list_t error_list
The list of errors that have been found while parsing.
Definition parser.h:572
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition parser.h:597
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition parser.h:526
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition parser.h:569
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition parser.h:557
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition parser.h:506
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition parser.h:591
int32_t start_line
The line number at the start of the parse.
Definition parser.h:638
struct pm_parser::@96 lex_modes
A stack of lex modes.
pm_lex_mode_t * current
The current mode of the lexer.
Definition parser.h:523
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition parser.h:560
size_t index
The current index into the lexer mode stack.
Definition parser.h:529
pm_scope_t * current_scope
The current local scope.
Definition parser.h:575
bool command_start
Whether or not we're at the beginning of a command.
Definition parser.h:672
bool frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true value.
Definition parser.h:706
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition parser.h:618
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition parser.h:700
PinnedExpressionNode.
Definition ast.h:3753
PinnedVariableNode.
Definition ast.h:3777
PostExecutionNode.
Definition ast.h:3795
PreExecutionNode.
Definition ast.h:3819
ProgramNode.
Definition ast.h:3843
RangeNode.
Definition ast.h:3863
struct pm_node * right
RangeNode#right.
Definition ast.h:3871
struct pm_node * left
RangeNode#left.
Definition ast.h:3868
RationalNode.
Definition ast.h:3884
RedoNode.
Definition ast.h:3899
RegularExpressionNode.
Definition ast.h:3923
RequiredKeywordParameterNode.
Definition ast.h:3947
RequiredParameterNode.
Definition ast.h:3965
RescueModifierNode.
Definition ast.h:3980
RescueNode.
Definition ast.h:4001
struct pm_rescue_node * consequent
RescueNode#consequent.
Definition ast.h:4021
pm_location_t operator_loc
RescueNode#operator_loc.
Definition ast.h:4012
struct pm_node * reference
RescueNode#reference.
Definition ast.h:4015
struct pm_node_list exceptions
RescueNode#exceptions.
Definition ast.h:4009
struct pm_statements_node * statements
RescueNode#statements.
Definition ast.h:4018
pm_node_t base
The embedded base node.
Definition ast.h:4003
RestParameterNode.
Definition ast.h:4031
RetryNode.
Definition ast.h:4052
ReturnNode.
Definition ast.h:4064
This struct represents a node in a linked list of scopes.
Definition parser.h:454
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition parser.h:459
pm_constant_id_list_t locals
The IDs of the locals in the given scope.
Definition parser.h:456
bool explicit_params
A boolean indicating whether or not this scope has explicit parameters.
Definition parser.h:472
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition parser.h:465
uint8_t numbered_parameters
An integer indicating the number of numbered parameters on this scope.
Definition parser.h:480
SelfNode.
Definition ast.h:4082
SingletonClassNode.
Definition ast.h:4094
SourceEncodingNode.
Definition ast.h:4124
SourceFileNode.
Definition ast.h:4136
SourceLineNode.
Definition ast.h:4151
SplatNode.
Definition ast.h:4163
struct pm_node * expression
SplatNode#expression.
Definition ast.h:4171
StatementsNode.
Definition ast.h:4181
struct pm_node_list body
StatementsNode#body.
Definition ast.h:4186
pm_node_t base
The embedded base node.
Definition ast.h:4183
A list of strings.
pm_string_t * strings
A pointer to the start of the string list.
size_t length
The length of the string list.
StringNode.
Definition ast.h:4200
pm_node_t base
The embedded base node.
Definition ast.h:4202
pm_string_t unescaped
StringNode#unescaped.
Definition ast.h:4214
pm_location_t content_loc
StringNode#content_loc.
Definition ast.h:4208
pm_location_t closing_loc
StringNode#closing_loc.
Definition ast.h:4211
pm_location_t opening_loc
StringNode#opening_loc.
Definition ast.h:4205
A generic string type that can have various ownership semantics.
Definition pm_string.h:30
const uint8_t * source
A pointer to the start of the string.
Definition pm_string.h:32
enum pm_string_t::@97 type
The type of the string.
size_t length
The length of the string in bytes of memory.
Definition pm_string.h:35
SuperNode.
Definition ast.h:4224
SymbolNode.
Definition ast.h:4255
pm_location_t opening_loc
SymbolNode#opening_loc.
Definition ast.h:4260
pm_location_t value_loc
SymbolNode#value_loc.
Definition ast.h:4263
pm_location_t closing_loc
SymbolNode#closing_loc.
Definition ast.h:4266
pm_string_t unescaped
SymbolNode#unescaped.
Definition ast.h:4269
pm_node_t base
The embedded base node.
Definition ast.h:4257
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:7960
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:7965
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:7971
This struct represents a token in the Ruby source.
Definition ast.h:528
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:536
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:533
pm_token_type_t type
The type of the token.
Definition ast.h:530
TrueNode.
Definition ast.h:4279
UndefNode.
Definition ast.h:4291
pm_node_t base
The embedded base node.
Definition ast.h:4293
struct pm_node_list names
UndefNode#names.
Definition ast.h:4296
UnlessNode.
Definition ast.h:4309
struct pm_else_node * consequent
UnlessNode#consequent.
Definition ast.h:4326
pm_location_t end_keyword_loc
UnlessNode#end_keyword_loc.
Definition ast.h:4329
pm_node_t base
The embedded base node.
Definition ast.h:4311
struct pm_statements_node * statements
UnlessNode#statements.
Definition ast.h:4323
UntilNode.
Definition ast.h:4341
WhenNode.
Definition ast.h:4365
struct pm_statements_node * statements
WhenNode#statements.
Definition ast.h:4376
pm_node_t base
The embedded base node.
Definition ast.h:4367
struct pm_node_list conditions
WhenNode#conditions.
Definition ast.h:4373
WhileNode.
Definition ast.h:4388
XStringNode.
Definition ast.h:4415
YieldNode.
Definition ast.h:4439