Soar Kernel  9.3.2 08-06-12
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
lexer.h
Go to the documentation of this file.
1 /*************************************************************************
2  * PLEASE SEE THE FILE "license.txt" (INCLUDED WITH THIS SOFTWARE PACKAGE)
3  * FOR LICENSE AND COPYRIGHT INFORMATION.
4  *************************************************************************/
5 
6 /* ======================================================================
7  lexer.h
8 
9  The lexer reads files and returns a stream of lexemes. Get_lexeme() is
10  the main routine; it looks for the next lexeme in the input, and stores
11  it in the global variable "lexeme". See the structure definition below.
12 
13  Restrictions: the lexer cannot read individual input lines longer than
14  MAX_LEXER_LINE_LENGTH characters. Thus, a single lexeme can't be longer
15  than that either.
16 
17  The lexer maintains a stack of files being read, in order to handle nested
18  loads. Start_lex_from_file() and stop_lex_from_file() push and pop the
19  stack. Immediately after start_lex_from_file(), the current lexeme (global
20  variable) is undefined. Immediately after stop_lex_from_file(), the
21  current lexeme is automatically restored to whatever it was just before
22  the corresponding start_lex_from_file() call.
23 
24  Determine_possible_symbol_types_for_string() is a utility routine which
25  figures out what kind(s) of symbol a given string could represent.
26 
27  Print_location_of_most_recent_lexeme() is used to print an indication
28  of where a parser error occurred. It tries to print out the current
29  source line with a pointer to where the error was detected.
30 
31  Current_lexer_parentheses_level() returns the current level of parentheses
32  nesting (0 means no open paren's have been encountered).
33  Skip_ahead_to_balanced_parentheses() eats lexemes until the appropriate
34  closing paren is found (0 means eat until back at the top level).
35 
36  Fake_rparen_at_next_end_of_line() tells the lexer to insert a fake
37  R_PAREN_LEXEME token the next time it reaches the end of a line.
38 
39  Set_lexer_allow_ids() tells the lexer whether to allow identifiers to
40  be read. If FALSE, things that look like identifiers will be returned
41  as SYM_CONSTANT_LEXEME's instead.
42 ====================================================================== */
43 
44 #ifndef LEXER_H
45 #define LEXER_H
46 
47 #include <stdio.h> // Needed for FILE token below
48 
49 #ifdef __cplusplus
50 //extern "C"
51 //{
52 #endif
53 
54 typedef char Bool;
55 typedef struct agent_struct agent;
56 
57 #define MAX_LEXER_LINE_LENGTH 1000
58 #define MAX_LEXEME_LENGTH (MAX_LEXER_LINE_LENGTH+5) /* a little bigger to avoid
59  any off-by-one-errors */
60 
62  EOF_LEXEME, /* end-of-file */
63  IDENTIFIER_LEXEME, /* identifier */
64  VARIABLE_LEXEME, /* variable */
65  SYM_CONSTANT_LEXEME, /* symbolic constant */
66  INT_CONSTANT_LEXEME, /* integer constant */
67  FLOAT_CONSTANT_LEXEME, /* floating point constant */
68  L_PAREN_LEXEME, /* "(" */
69  R_PAREN_LEXEME, /* ")" */
70  L_BRACE_LEXEME, /* "{" */
71  R_BRACE_LEXEME, /* "}" */
72  PLUS_LEXEME, /* "+" */
73  MINUS_LEXEME, /* "-" */
74  RIGHT_ARROW_LEXEME, /* "-->" */
75  GREATER_LEXEME, /* ">" */
76  LESS_LEXEME, /* "<" */
77  EQUAL_LEXEME, /* "=" */
78  LESS_EQUAL_LEXEME, /* "<=" */
80  NOT_EQUAL_LEXEME, /* "<>" */
82  LESS_LESS_LEXEME, /* "<<" */
84  AMPERSAND_LEXEME, /* "&" */
85  AT_LEXEME, /* "@" */
86  TILDE_LEXEME, /* "~" */
87  UP_ARROW_LEXEME, /* "^" */
89  COMMA_LEXEME, /* "," */
90  PERIOD_LEXEME, /* "." */
91  QUOTED_STRING_LEXEME, /* string in double quotes */
92  DOLLAR_STRING_LEXEME, /* string for shell escape */
93  NULL_LEXEME }; /* Initial value */
94 
95 #define LENGTH_OF_LONGEST_SPECIAL_LEXEME 3 /* length of "-->" and "<=>"--
96  if a longer one is added, be
97  sure to update this! */
98 
99 struct lexeme_info {
100  enum lexer_token_type type; /* what kind of lexeme it is */
101  char string[MAX_LEXEME_LENGTH+1]; /* text of the lexeme */
102  int length; /* length of the above string */
103  int64_t int_val; /* for INT_CONSTANT_LEXEME's */
104  double float_val; /* for FLOAT_CONSTANT_LEXEME's */
105  char id_letter; /* for IDENTIFIER_LEXEME's */
106  uint64_t id_number; /* for IDENTIFIER_LEXEME's */
107 };
108 
109 extern void determine_possible_symbol_types_for_string (char *s,
110  size_t length_of_s,
111  Bool *possible_id,
112  Bool *possible_var,
113  Bool *possible_sc,
114  Bool *possible_ic,
115  Bool *possible_fc,
116  Bool *rereadable);
117 
118 extern void init_lexer (agent* thisAgent);
119 extern void start_lex_from_file (agent* thisAgent, const char *filename,
120  FILE *already_opened_file);
121 extern void stop_lex_from_file (agent* thisAgent);
122 
123 extern void get_lexeme (agent* thisAgent);
124 extern void print_location_of_most_recent_lexeme (agent* thisAgent);
125 
126 extern int current_lexer_parentheses_level (agent* thisAgent);
127 extern void skip_ahead_to_balanced_parentheses (agent* thisAgent,
128  int parentheses_level);
129 extern void fake_rparen_at_next_end_of_line (agent* thisAgent);
130 extern void set_lexer_allow_ids (agent* thisAgent, Bool allow_identifiers);
131 extern Bool get_lexer_allow_ids (agent* thisAgent);
132 
135 /* (RBD) the rest of this stuff shouldn't be in the module interface... */
136 
137 #define BUFSIZE (MAX_LEXER_LINE_LENGTH+2) /* +2 for newline and null at end */
139 /* --- we'll use one of these structures for each file being read --- */
141 typedef struct lexer_source_file_struct {
143  char *filename;
144  FILE *file;
147  int parentheses_level; /* 0 means top level, no left paren's seen */
148  int current_column; /* column number of next char to read (0-based) */
149  uint64_t current_line; /* line number of line in buffer (1-based) */
150  int column_of_start_of_last_lexeme; /* (used for error messages) */
152  char buffer[BUFSIZE]; /* holds text of current input line */
153  struct lexeme_info saved_lexeme; /* save/restore it during nested loads */
154  int saved_current_char; /* save/restore this too */
156 
157 #ifdef __cplusplus
158 //}
159 #endif
160 
161 #endif // LEXER_H