Soar Kernel
9.3.2 08-06-12
Main Page
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
src
lexer.h
Go to the documentation of this file.
1
/*************************************************************************
2
* PLEASE SEE THE FILE "license.txt" (INCLUDED WITH THIS SOFTWARE PACKAGE)
3
* FOR LICENSE AND COPYRIGHT INFORMATION.
4
*************************************************************************/
5
6
/* ======================================================================
7
lexer.h
8
9
The lexer reads files and returns a stream of lexemes. Get_lexeme() is
10
the main routine; it looks for the next lexeme in the input, and stores
11
it in the global variable "lexeme". See the structure definition below.
12
13
Restrictions: the lexer cannot read individual input lines longer than
14
MAX_LEXER_LINE_LENGTH characters. Thus, a single lexeme can't be longer
15
than that either.
16
17
The lexer maintains a stack of files being read, in order to handle nested
18
loads. Start_lex_from_file() and stop_lex_from_file() push and pop the
19
stack. Immediately after start_lex_from_file(), the current lexeme (global
20
variable) is undefined. Immediately after stop_lex_from_file(), the
21
current lexeme is automatically restored to whatever it was just before
22
the corresponding start_lex_from_file() call.
23
24
Determine_possible_symbol_types_for_string() is a utility routine which
25
figures out what kind(s) of symbol a given string could represent.
26
27
Print_location_of_most_recent_lexeme() is used to print an indication
28
of where a parser error occurred. It tries to print out the current
29
source line with a pointer to where the error was detected.
30
31
Current_lexer_parentheses_level() returns the current level of parentheses
32
nesting (0 means no open paren's have been encountered).
33
Skip_ahead_to_balanced_parentheses() eats lexemes until the appropriate
34
closing paren is found (0 means eat until back at the top level).
35
36
Fake_rparen_at_next_end_of_line() tells the lexer to insert a fake
37
R_PAREN_LEXEME token the next time it reaches the end of a line.
38
39
Set_lexer_allow_ids() tells the lexer whether to allow identifiers to
40
be read. If FALSE, things that look like identifiers will be returned
41
as SYM_CONSTANT_LEXEME's instead.
42
====================================================================== */
43
44
#ifndef LEXER_H
45
#define LEXER_H
46
47
#include <stdio.h>
// Needed for FILE token below
48
49
#ifdef __cplusplus
50
//extern "C"
51
//{
52
#endif
53
54
typedef
char
Bool
;
55
typedef
struct
agent_struct
agent
;
56
57
#define MAX_LEXER_LINE_LENGTH 1000
58
#define MAX_LEXEME_LENGTH (MAX_LEXER_LINE_LENGTH+5)
/* a little bigger to avoid
59
any off-by-one-errors */
60
61
enum
lexer_token_type
{
62
EOF_LEXEME
,
/* end-of-file */
63
IDENTIFIER_LEXEME
,
/* identifier */
64
VARIABLE_LEXEME
,
/* variable */
65
SYM_CONSTANT_LEXEME
,
/* symbolic constant */
66
INT_CONSTANT_LEXEME
,
/* integer constant */
67
FLOAT_CONSTANT_LEXEME
,
/* floating point constant */
68
L_PAREN_LEXEME
,
/* "(" */
69
R_PAREN_LEXEME
,
/* ")" */
70
L_BRACE_LEXEME
,
/* "{" */
71
R_BRACE_LEXEME
,
/* "}" */
72
PLUS_LEXEME
,
/* "+" */
73
MINUS_LEXEME
,
/* "-" */
74
RIGHT_ARROW_LEXEME
,
/* "-->" */
75
GREATER_LEXEME
,
/* ">" */
76
LESS_LEXEME
,
/* "<" */
77
EQUAL_LEXEME
,
/* "=" */
78
LESS_EQUAL_LEXEME
,
/* "<=" */
79
GREATER_EQUAL_LEXEME
,
/* ">=" */
80
NOT_EQUAL_LEXEME
,
/* "<>" */
81
LESS_EQUAL_GREATER_LEXEME
,
/* "<=>" */
82
LESS_LESS_LEXEME
,
/* "<<" */
83
GREATER_GREATER_LEXEME
,
/* ">>" */
84
AMPERSAND_LEXEME
,
/* "&" */
85
AT_LEXEME
,
/* "@" */
86
TILDE_LEXEME
,
/* "~" */
87
UP_ARROW_LEXEME
,
/* "^" */
88
EXCLAMATION_POINT_LEXEME
,
/* "!" */
89
COMMA_LEXEME
,
/* "," */
90
PERIOD_LEXEME
,
/* "." */
91
QUOTED_STRING_LEXEME
,
/* string in double quotes */
92
DOLLAR_STRING_LEXEME
,
/* string for shell escape */
93
NULL_LEXEME
};
/* Initial value */
94
95
#define LENGTH_OF_LONGEST_SPECIAL_LEXEME 3
/* length of "-->" and "<=>"--
96
if a longer one is added, be
97
sure to update this! */
98
99
struct
lexeme_info
{
100
enum
lexer_token_type
type
;
/* what kind of lexeme it is */
101
char
string
[
MAX_LEXEME_LENGTH
+1];
/* text of the lexeme */
102
int
length
;
/* length of the above string */
103
int64_t
int_val
;
/* for INT_CONSTANT_LEXEME's */
104
double
float_val
;
/* for FLOAT_CONSTANT_LEXEME's */
105
char
id_letter
;
/* for IDENTIFIER_LEXEME's */
106
uint64_t
id_number
;
/* for IDENTIFIER_LEXEME's */
107
};
108
109
extern
void
determine_possible_symbol_types_for_string
(
char
*s,
110
size_t
length_of_s,
111
Bool
*possible_id,
112
Bool
*possible_var,
113
Bool
*possible_sc,
114
Bool
*possible_ic,
115
Bool
*possible_fc,
116
Bool
*rereadable);
117
118
extern
void
init_lexer
(
agent
* thisAgent);
119
extern
void
start_lex_from_file
(
agent
* thisAgent,
const
char
*filename,
120
FILE *already_opened_file);
121
extern
void
stop_lex_from_file
(
agent
* thisAgent);
122
123
extern
void
get_lexeme
(
agent
* thisAgent);
124
extern
void
print_location_of_most_recent_lexeme
(
agent
* thisAgent);
125
126
extern
int
current_lexer_parentheses_level
(
agent
* thisAgent);
127
extern
void
skip_ahead_to_balanced_parentheses
(
agent
* thisAgent,
128
int
parentheses_level);
129
extern
void
fake_rparen_at_next_end_of_line
(
agent
* thisAgent);
130
extern
void
set_lexer_allow_ids
(
agent
* thisAgent,
Bool
allow_identifiers);
131
extern
Bool
get_lexer_allow_ids
(
agent
* thisAgent);
132
133
extern
Bool
determine_type_of_constituent_string
(
agent
* thisAgent);
134
135
/* (RBD) the rest of this stuff shouldn't be in the module interface... */
136
137
#define BUFSIZE (MAX_LEXER_LINE_LENGTH+2)
/* +2 for newline and null at end */
138
139
/* --- we'll use one of these structures for each file being read --- */
140
141
typedef
struct
lexer_source_file_struct
{
142
struct
lexer_source_file_struct
*
parent_file
;
143
char
*
filename
;
144
FILE *
file
;
145
Bool
fake_rparen_at_eol
;
146
Bool
allow_ids
;
147
int
parentheses_level
;
/* 0 means top level, no left paren's seen */
148
int
current_column
;
/* column number of next char to read (0-based) */
149
uint64_t
current_line
;
/* line number of line in buffer (1-based) */
150
int
column_of_start_of_last_lexeme
;
/* (used for error messages) */
151
uint64_t
line_of_start_of_last_lexeme
;
152
char
buffer
[
BUFSIZE
];
/* holds text of current input line */
153
struct
lexeme_info
saved_lexeme
;
/* save/restore it during nested loads */
154
int
saved_current_char
;
/* save/restore this too */
155
}
lexer_source_file
;
156
157
#ifdef __cplusplus
158
//}
159
#endif
160
161
#endif // LEXER_H
Generated on Mon Aug 6 2012 17:21:02 for Soar Kernel by
1.8.1.2