Soar Kernel  9.3.2 08-06-12
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Data Structures | Macros | Typedefs | Enumerations | Functions
lexer.h File Reference
#include <stdio.h>

Go to the source code of this file.

Data Structures

struct  lexeme_info
struct  lexer_source_file_struct

Macros

#define BUFSIZE   (MAX_LEXER_LINE_LENGTH+2) /* +2 for newline and null at end */
#define LENGTH_OF_LONGEST_SPECIAL_LEXEME
#define MAX_LEXEME_LENGTH
#define MAX_LEXER_LINE_LENGTH   1000

Typedefs

typedef struct agent_struct agent
typedef char Bool
typedef struct
lexer_source_file_struct 
lexer_source_file

Enumerations

enum  lexer_token_type

Functions

int current_lexer_parentheses_level (agent *thisAgent)
void determine_possible_symbol_types_for_string (char *s, size_t length_of_s, Bool *possible_id, Bool *possible_var, Bool *possible_sc, Bool *possible_ic, Bool *possible_fc, Bool *rereadable)
Bool determine_type_of_constituent_string (agent *thisAgent)
void fake_rparen_at_next_end_of_line (agent *thisAgent)
void get_lexeme (agent *thisAgent)
Bool get_lexer_allow_ids (agent *thisAgent)
void init_lexer (agent *thisAgent)
void print_location_of_most_recent_lexeme (agent *thisAgent)
void set_lexer_allow_ids (agent *thisAgent, Bool allow_identifiers)
void skip_ahead_to_balanced_parentheses (agent *thisAgent, int parentheses_level)
void start_lex_from_file (agent *thisAgent, const char *filename, FILE *already_opened_file)
void stop_lex_from_file (agent *thisAgent)

Macro Definition Documentation

#define BUFSIZE   (MAX_LEXER_LINE_LENGTH+2) /* +2 for newline and null at end */

Definition at line 134 of file lexer.h.

Referenced by get_next_char().

#define LENGTH_OF_LONGEST_SPECIAL_LEXEME
Value:
3 /* length of "-->" and "<=>"--
if a longer one is added, be
sure to update this! */

Definition at line 94 of file lexer.h.

Referenced by determine_possible_symbol_types_for_string().

#define MAX_LEXEME_LENGTH
#define MAX_LEXER_LINE_LENGTH   1000

Definition at line 57 of file lexer.h.

Referenced by get_next_char().

Typedef Documentation

typedef struct agent_struct agent

Definition at line 55 of file lexer.h.

typedef char Bool

Definition at line 54 of file lexer.h.

Enumeration Type Documentation

Enumerator:
EOF_LEXEME 
IDENTIFIER_LEXEME 
VARIABLE_LEXEME 
SYM_CONSTANT_LEXEME 
INT_CONSTANT_LEXEME 
FLOAT_CONSTANT_LEXEME 
L_PAREN_LEXEME 
R_PAREN_LEXEME 
L_BRACE_LEXEME 
R_BRACE_LEXEME 
PLUS_LEXEME 
MINUS_LEXEME 
RIGHT_ARROW_LEXEME 
GREATER_LEXEME 
LESS_LEXEME 
EQUAL_LEXEME 
LESS_EQUAL_LEXEME 
GREATER_EQUAL_LEXEME 
NOT_EQUAL_LEXEME 
LESS_EQUAL_GREATER_LEXEME 
LESS_LESS_LEXEME 
GREATER_GREATER_LEXEME 
AMPERSAND_LEXEME 
AT_LEXEME 
TILDE_LEXEME 
UP_ARROW_LEXEME 
EXCLAMATION_POINT_LEXEME 
COMMA_LEXEME 
PERIOD_LEXEME 
QUOTED_STRING_LEXEME 
DOLLAR_STRING_LEXEME 
NULL_LEXEME 

Definition at line 60 of file lexer.h.

{
EOF_LEXEME, /* end-of-file */
IDENTIFIER_LEXEME, /* identifier */
VARIABLE_LEXEME, /* variable */
SYM_CONSTANT_LEXEME, /* symbolic constant */
INT_CONSTANT_LEXEME, /* integer constant */
FLOAT_CONSTANT_LEXEME, /* floating point constant */
L_PAREN_LEXEME, /* "(" */
R_PAREN_LEXEME, /* ")" */
L_BRACE_LEXEME, /* "{" */
R_BRACE_LEXEME, /* "}" */
PLUS_LEXEME, /* "+" */
MINUS_LEXEME, /* "-" */
RIGHT_ARROW_LEXEME, /* "-->" */
GREATER_LEXEME, /* ">" */
LESS_LEXEME, /* "<" */
EQUAL_LEXEME, /* "=" */
LESS_EQUAL_LEXEME, /* "<=" */
NOT_EQUAL_LEXEME, /* "<>" */
LESS_LESS_LEXEME, /* "<<" */
AMPERSAND_LEXEME, /* "&" */
AT_LEXEME, /* "@" */
TILDE_LEXEME, /* "~" */
UP_ARROW_LEXEME, /* "^" */
COMMA_LEXEME, /* "," */
PERIOD_LEXEME, /* "." */
QUOTED_STRING_LEXEME, /* string in double quotes */
DOLLAR_STRING_LEXEME, /* string for shell escape */

Function Documentation

int current_lexer_parentheses_level ( agent thisAgent)

Definition at line 1078 of file lexer.cpp.

References agent_struct::current_file, and lexer_source_file_struct::parentheses_level.

{
return thisAgent->current_file->parentheses_level;
}
void determine_possible_symbol_types_for_string ( char *  s,
size_t  length_of_s,
Bool possible_id,
Bool possible_var,
Bool possible_sc,
Bool possible_ic,
Bool possible_fc,
Bool rereadable 
)

Definition at line 1125 of file lexer.cpp.

References constituent_char, FALSE, LENGTH_OF_LONGEST_SPECIAL_LEXEME, number_starters, and TRUE.

Referenced by determine_type_of_constituent_string(), get_io_symbol_from_tio_constituent_string(), and symbol_to_string().

{
char *ch;
Bool all_alphanum;
*possible_id = FALSE;
*possible_var = FALSE;
*possible_sc = FALSE;
*possible_ic = FALSE;
*possible_fc = FALSE;
*rereadable = FALSE;
/* --- check if it's an integer or floating point number --- */
if (number_starters[static_cast<unsigned char>(*s)]) {
ch = s;
if ((*ch=='+')||(*ch=='-'))
ch++; /* optional leading + or - */
while (isdigit(*ch))
ch++; /* string of digits */
if ((*ch==0)&&(isdigit(*(ch-1))))
*possible_ic = TRUE;
if (*ch=='.') {
ch++; /* decimal point */
while (isdigit(*ch))
ch++; /* string of digits */
if ((*ch=='e')||(*ch=='E')) {
ch++; /* E */
if ((*ch=='+')||(*ch=='-'))
ch++; /* optional leading + or - */
while (isdigit(*ch))
ch++; /* string of digits */
}
if (*ch==0)
*possible_fc = TRUE;
}
}
/* --- make sure it's entirely constituent characters --- */
for (ch=s; *ch!=0; ch++)
if (! constituent_char[static_cast<unsigned char>(*ch)])
return;
/* --- check for rereadability --- */
all_alphanum = TRUE;
for (ch=s; *ch!='\0'; ch++) {
if (!isalnum(*ch)) {
all_alphanum = FALSE;
break;
}
}
if ( all_alphanum ||
((length_of_s==1)&&(*s=='*')) )
{
*rereadable = TRUE;
}
/* --- any string of constituents could be a sym constant --- */
*possible_sc = TRUE;
/* --- check whether it's a variable --- */
if ((*s=='<')&&(*(s+length_of_s-1)=='>'))
*possible_var = TRUE;
/* --- check if it's an identifier --- */
// long term identifiers start with @
if (*s == '@') {
ch = s+1;
} else {
ch = s;
}
if (isalpha(*ch) && *(++ch) != '\0') {
/* --- is the rest of the string an integer? --- */
while (isdigit(*ch))
ch++;
if (*ch=='\0')
*possible_id = TRUE;
}
}
Bool determine_type_of_constituent_string ( agent thisAgent)

Definition at line 307 of file lexer.cpp.

References lexer_source_file_struct::allow_ids, agent_struct::current_file, determine_possible_symbol_types_for_string(), FLOAT_CONSTANT_LEXEME, lexeme_info::float_val, lexeme_info::id_letter, lexeme_info::id_number, IDENTIFIER_LEXEME, INT_CONSTANT_LEXEME, lexeme_info::int_val, lexeme_info::length, agent_struct::lexeme, print(), print_location_of_most_recent_lexeme(), PRINT_WARNINGS_SYSPARAM, QUOTED_STRING_LEXEME, lexeme_info::string, SYM_CONSTANT_LEXEME, agent_struct::sysparams, TRUE, lexeme_info::type, VARIABLE_LEXEME, and xml_generate_warning().

Referenced by get_lexeme_from_string(), lex_ampersand(), lex_constituent_string(), lex_digit(), lex_equal(), lex_greater(), lex_less(), lex_minus(), lex_period(), and lex_plus().

{
Bool possible_id, possible_var, possible_sc, possible_ic, possible_fc;
Bool rereadable;
thisAgent->lexeme.length,
&possible_id,
&possible_var,
&possible_sc,
&possible_ic,
&possible_fc,
&rereadable);
if (possible_var) {
thisAgent->lexeme.type = VARIABLE_LEXEME;
return TRUE;
}
if (possible_ic) {
errno = 0;
thisAgent->lexeme.int_val = strtol (thisAgent->lexeme.string,NULL,10);
if (errno) {
print (thisAgent, "Error: bad integer (probably too large)\n");
thisAgent->lexeme.int_val = 0;
}
return (errno == 0);
}
if (possible_fc) {
errno = 0;
thisAgent->lexeme.float_val = strtod (thisAgent->lexeme.string,NULL);
if (errno) {
print (thisAgent, "Error: bad floating point number\n");
thisAgent->lexeme.float_val = 0.0;
}
return (errno == 0);
}
if (thisAgent->current_file->allow_ids && possible_id) {
// long term identifiers start with @
unsigned lti_index = 0;
if (thisAgent->lexeme.string[lti_index] == '@') {
lti_index += 1;
}
thisAgent->lexeme.id_letter = static_cast<char>(toupper(thisAgent->lexeme.string[lti_index]));
lti_index += 1;
errno = 0;
if (!from_c_string(thisAgent->lexeme.id_number, &(thisAgent->lexeme.string[lti_index]))) {
print (thisAgent, "Error: bad number for identifier (probably too large)\n");
thisAgent->lexeme.id_number = 0;
errno = 1;
}
return (errno == 0);
}
if (possible_sc) {
if ( (thisAgent->lexeme.string[0] == '<') ||
(thisAgent->lexeme.string[thisAgent->lexeme.length-1] == '>') )
{
print (thisAgent, "Warning: Suspicious string constant \"%s\"\n", thisAgent->lexeme.string);
xml_generate_warning(thisAgent, "Warning: Suspicious string constant");
}
}
return TRUE;
}
return TRUE;
}
void fake_rparen_at_next_end_of_line ( agent thisAgent)
void get_lexeme ( agent thisAgent)

Definition at line 747 of file lexer.cpp.

References agent_struct::current_char, agent_struct::current_file, do_fake_rparen(), lexer_source_file_struct::fake_rparen_at_eol, FALSE, get_next_char(), lexeme_info::length, agent_struct::lex_alias, lex_eof(), expansion_node::lexeme, agent_struct::lexeme, lexer_routines(), agent_struct::load_errors_quit, expansion_node::next, record_position_of_start_of_lexeme(), lexeme_info::string, and whitespace.

Referenced by lex_unknown(), parse_attr_value_make(), parse_attr_value_tests(), parse_cond(), parse_disjunction_test(), parse_function_call_after_lparen(), parse_head_of_conds_for_one_id(), parse_lti(), parse_preference_specifier_without_referent(), parse_preferences(), parse_preferences_soar8_non_operator(), parse_production(), parse_relational_test(), parse_rhs_action(), parse_rhs_value(), parse_tail_of_conds_for_one_id(), parse_test(), parse_value_test_star(), skip_ahead_to_balanced_parentheses(), smem_parse_chunk(), and smem_parse_chunks().

{
/* AGR 568 begin */
if (thisAgent->lex_alias) {
thisAgent->lexeme = thisAgent->lex_alias->lexeme;
thisAgent->lex_alias = thisAgent->lex_alias->next;
return;
}
/* AGR 568 end */
thisAgent->lexeme.length = 0;
thisAgent->lexeme.string[0] = 0;
/* AGR 534 The only time a prompt should be printed out is if there's
a command being expected; ie. the prompt shouldn't print out if we're
in the middle of entering a production. So if we're in the middle of
entering a production, then the parentheses level will be > 0, so that's
the criteria we will use. AGR 5-Apr-94 */
thisAgent->load_errors_quit = FALSE; /* AGR 527c */
while (thisAgent->load_errors_quit==FALSE) { /* AGR 527c */
if (thisAgent->current_char==EOF) break;
if (whitespace[static_cast<unsigned char>(thisAgent->current_char)]) {
if (thisAgent->current_char == '\n')
{
if (thisAgent->current_file->fake_rparen_at_eol) {
do_fake_rparen(thisAgent);
return;
}
}
get_next_char(thisAgent);
continue;
}
//#ifdef USE_TCL
if (thisAgent->current_char==';') {
/* --- skip the semi-colon, forces newline in TCL --- */
get_next_char(thisAgent); /* consume it */
continue;
}
if (thisAgent->current_char=='#') {
/* --- read from hash to end-of-line --- */
while ((thisAgent->current_char!='\n') &&
(thisAgent->current_char!=EOF))
get_next_char(thisAgent);
if (thisAgent->current_file->fake_rparen_at_eol) {
do_fake_rparen(thisAgent);
return;
}
if (thisAgent->current_char!=EOF) get_next_char(thisAgent);
continue;
}
//#else
// if (thisAgent->current_char==';') {
// /* --- read from semicolon to end-of-line --- */
// while ((thisAgent->current_char!='\n') &&
// (thisAgent->current_char!=EOF))
// get_next_char(thisAgent);
// if (thisAgent->current_file->fake_rparen_at_eol) {
// do_fake_rparen(thisAgent);
// return;
// }
// if (thisAgent->current_char!=EOF) get_next_char(thisAgent);
// continue;
// }
// if (thisAgent->current_char=='#') {
// /* --- comments surrounded by "#|" and "|#" delimiters --- */
// record_position_of_start_of_lexeme(); /* in case of later error mesg. */
// get_next_char(thisAgent);
// if (thisAgent->current_char!='|') {
// print ("Error: '#' not followed by '|'\n");
// print_location_of_most_recent_lexeme(thisAgent);
// continue;
// }
// get_next_char(thisAgent); /* consume the vbar */
// while (TRUE) {
// if (thisAgent->current_char==EOF) {
// print ("Error: '#|' without terminating '|#'\n");
// print_location_of_most_recent_lexeme(thisAgent);
// break;
// }
// if (thisAgent->current_char!='|') { get_next_char(thisAgent); continue; }
// get_next_char(thisAgent);
// if (thisAgent->current_char=='#') break;
// }
// get_next_char(thisAgent); /* consume the closing '#' */
// continue; /* continue outer while(TRUE), reading more whitespace */
// }
//#endif /* USE_TCL */
break; /* if no whitespace or comments found, break out of the loop */
}
/* --- no more whitespace, so go get the actual lexeme --- */
if (thisAgent->current_char!=EOF)
(*(lexer_routines[static_cast<unsigned char>(thisAgent->current_char)]))(thisAgent);
else
lex_eof(thisAgent);
}
Bool get_lexer_allow_ids ( agent thisAgent)

Definition at line 1109 of file lexer.cpp.

References lexer_source_file_struct::allow_ids, and agent_struct::current_file.

Referenced by parse_lti().

{
return thisAgent->current_file->allow_ids;
}
void init_lexer ( agent thisAgent)

Definition at line 861 of file lexer.cpp.

References constituent_char, lex_ampersand(), lex_at(), lex_comma(), lex_constituent_string(), lex_digit(), lex_dollar(), lex_equal(), lex_exclamation_point(), lex_greater(), lex_lbrace(), lex_less(), lex_lparen(), lex_minus(), lex_period(), lex_plus(), lex_quote(), lex_rbrace(), lex_rparen(), lex_tilde(), lex_up_arrow(), lex_vbar(), lexer_routines(), number_starters, start_lex_from_file(), TRUE, and whitespace.

Referenced by init_soar_agent().

{
static bool initialized = false;
if(!initialized)
{
initialized = true;
unsigned int i;
/* --- setup constituent_char array --- */
char extra_constituents[] = "$%&*+-/:<=>?_@";
for (i=0; i<256; i++)
{
//
// When i == 1, strchr returns true based on the terminating
// character. This is not the intent, so we exclude that case
// here.
//
if((strchr(extra_constituents, i) != 0) && i != 0)
{
}
else
{
constituent_char[i] = (isalnum(i) != 0);
}
}
// for (i=0; i<strlen(extra_constituents); i++)
// {
// constituent_char[(int)extra_constituents[i]]=TRUE;
// }
/* --- setup whitespace array --- */
for (i=0; i<256; i++)
{
whitespace[i] = (isspace(i) != 0);
}
/* --- setup number_starters array --- */
for (i=0; i<256; i++)
{
switch(i)
{
case '+':
break;
case '-':
break;
case '.':
break;
default:
number_starters[i] = (isdigit(i) != 0);
}
}
/* --- setup lexer_routines array --- */
//
// I go to some effort here to insure that values do not
// get overwritten. That could cause problems in a multi-
// threaded sense because values could get switched to one
// value and then another. If a value is only ever set to
// one thing, resetting it to the same thing should be
// perfectly safe.
//
for (i=0; i<256; i++)
{
switch(i)
{
case '@':
break;
case '(':
break;
case ')':
break;
case '+':
break;
case '-':
break;
case '~':
break;
case '^':
break;
case '{':
break;
case '}':
break;
case '!':
break;
case '>':
break;
case '<':
break;
case '=':
break;
case '&':
break;
case '|':
break;
case ',':
break;
case '.':
break;
case '"':
break;
case '$':
lexer_routines[(int)'$'] = lex_dollar; /* AGR 562 */
break;
default:
if (isdigit(i))
{
continue;
}
{
continue;
}
}
}
}
/* --- initially we're reading from the standard input --- */
start_lex_from_file (thisAgent, "[standard input]", stdin);
}
void print_location_of_most_recent_lexeme ( agent thisAgent)

Definition at line 1022 of file lexer.cpp.

References lexer_source_file_struct::buffer, lexer_source_file_struct::column_of_start_of_last_lexeme, agent_struct::current_char, agent_struct::current_file, lexer_source_file_struct::current_line, lexer_source_file_struct::filename, lexer_source_file_struct::line_of_start_of_last_lexeme, agent_struct::load_errors_quit, print(), print_string(), and reading_from_top_level().

Referenced by determine_type_of_constituent_string(), lex_quote(), lex_vbar(), parse_attr_value_make(), parse_attr_value_tests(), parse_cond(), parse_disjunction_test(), parse_function_call_after_lparen(), parse_head_of_conds_for_one_id(), parse_preferences_soar8_non_operator(), parse_production(), parse_relational_test(), parse_rhs_action(), parse_rhs_value(), and read_identifier_or_context_variable().

{
int i;
thisAgent->current_file->current_line) {
/* --- error occurred on current line, so print out the line --- */
if (! reading_from_top_level(thisAgent)) {
print (thisAgent, "File %s, line %lu:\n", thisAgent->current_file->filename,
thisAgent->current_file->current_line);
/* respond_to_load_errors (); AGR 527a */
}
if (thisAgent->current_file->buffer[strlen(thisAgent->current_file->buffer)-1]=='\n')
print_string (thisAgent, thisAgent->current_file->buffer);
else
print (thisAgent, "%s\n",thisAgent->current_file->buffer);
for (i=0; i<thisAgent->current_file->column_of_start_of_last_lexeme; i++)
print_string (thisAgent, "-");
print_string (thisAgent, "^\n");
if (! reading_from_top_level(thisAgent)) {
//respond_to_load_errors (thisAgent); /* AGR 527a */
if (thisAgent->load_errors_quit)
thisAgent->current_char = EOF;
}
/* AGR 527a The respond_to_load_errors call came too early (above),
and the "continue" prompt appeared before the offending line was printed
out, so the respond_to_load_errors call was moved here.
AGR 26-Apr-94 */
} else {
/* --- error occurred on a previous line, so just give the position --- */
print (thisAgent, "File %s, line %lu, column %lu.\n", thisAgent->current_file->filename,
if (! reading_from_top_level(thisAgent)) {
//respond_to_load_errors (thisAgent);
if (thisAgent->load_errors_quit)
thisAgent->current_char = EOF;
}
}
}
void set_lexer_allow_ids ( agent thisAgent,
Bool  allow_identifiers 
)

Definition at line 1105 of file lexer.cpp.

References lexer_source_file_struct::allow_ids, and agent_struct::current_file.

Referenced by parse_lti(), and smem_parse_chunks().

{
thisAgent->current_file->allow_ids = allow_identifiers;
}
void skip_ahead_to_balanced_parentheses ( agent thisAgent,
int  parentheses_level 
)

Definition at line 1082 of file lexer.cpp.

References agent_struct::current_file, EOF_LEXEME, get_lexeme(), agent_struct::lexeme, lexer_source_file_struct::parentheses_level, R_PAREN_LEXEME, TRUE, and lexeme_info::type.

{
while (TRUE) {
if (thisAgent->lexeme.type==EOF_LEXEME) return;
if ((thisAgent->lexeme.type==R_PAREN_LEXEME) &&
(parentheses_level==thisAgent->current_file->parentheses_level)) return;
get_lexeme(thisAgent);
}
}
void start_lex_from_file ( agent thisAgent,
const char *  filename,
FILE *  already_opened_file 
)

Definition at line 94 of file lexer.cpp.

References allocate_memory(), lexer_source_file_struct::allow_ids, lexer_source_file_struct::buffer, lexer_source_file_struct::column_of_start_of_last_lexeme, agent_struct::current_char, lexer_source_file_struct::current_column, agent_struct::current_file, lexer_source_file_struct::current_line, lexer_source_file_struct::fake_rparen_at_eol, FALSE, lexer_source_file_struct::file, lexer_source_file_struct::filename, agent_struct::lexeme, lexer_source_file_struct::line_of_start_of_last_lexeme, make_memory_block_for_string(), MISCELLANEOUS_MEM_USAGE, lexer_source_file_struct::parent_file, lexer_source_file_struct::parentheses_level, lexer_source_file_struct::saved_current_char, lexer_source_file_struct::saved_lexeme, and TRUE.

Referenced by init_lexer(), and load_file().

{
lsf = static_cast<lexer_source_file_struct *>(allocate_memory (thisAgent, sizeof(lexer_source_file),
lsf->saved_lexeme = thisAgent->lexeme;
lsf->saved_current_char = thisAgent->current_char;
lsf->parent_file = thisAgent->current_file;
thisAgent->current_file = lsf;
lsf->filename = make_memory_block_for_string (thisAgent, filename);
lsf->file = already_opened_file;
lsf->allow_ids = TRUE;
lsf->current_line = 0;
lsf->current_column = 0;
lsf->buffer[0] = 0;
thisAgent->current_char = ' '; /* whitespace--to force immediate read of first line */
}
void stop_lex_from_file ( agent thisAgent)