Soar Kernel  9.3.2 08-06-12
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Macros | Functions | Variables
lexer.cpp File Reference
#include <portability.h>
#include <stdlib.h>
#include "lexer.h"
#include "mem.h"
#include "kernel.h"
#include "agent.h"
#include "print.h"
#include "init_soar.h"
#include "xml.h"
#include <math.h>
#include <ctype.h>
#include <assert.h>

Go to the source code of this file.

Macros

#define lu   lex_unknown

Functions

int current_lexer_parentheses_level (agent *thisAgent)
void determine_possible_symbol_types_for_string (char *s, size_t length_of_s, Bool *possible_id, Bool *possible_var, Bool *possible_sc, Bool *possible_ic, Bool *possible_fc, Bool *rereadable)
Bool determine_type_of_constituent_string (agent *thisAgent)
void do_fake_rparen (agent *thisAgent)
void fake_rparen_at_next_end_of_line (agent *thisAgent)
void finish (agent *thisAgent)
void get_lexeme (agent *thisAgent)
Bool get_lexer_allow_ids (agent *thisAgent)
void get_next_char (agent *thisAgent)
void init_lexer (agent *thisAgent)
void lex_ampersand (agent *thisAgent)
void lex_at (agent *thisAgent)
void lex_comma (agent *thisAgent)
void lex_constituent_string (agent *thisAgent)
void lex_digit (agent *thisAgent)
void lex_dollar (agent *thisAgent)
void lex_eof (agent *thisAgent)
void lex_equal (agent *thisAgent)
void lex_exclamation_point (agent *thisAgent)
void lex_greater (agent *thisAgent)
void lex_lbrace (agent *thisAgent)
void lex_less (agent *thisAgent)
void lex_lparen (agent *thisAgent)
void lex_minus (agent *thisAgent)
void lex_period (agent *thisAgent)
void lex_plus (agent *thisAgent)
void lex_quote (agent *thisAgent)
void lex_rbrace (agent *thisAgent)
void lex_rparen (agent *thisAgent)
void lex_tilde (agent *thisAgent)
void lex_unknown (agent *thisAgent)
void lex_up_arrow (agent *thisAgent)
void lex_vbar (agent *thisAgent)
void lexer_routines (agent *)
void print_location_of_most_recent_lexeme (agent *thisAgent)
void read_constituent_string (agent *thisAgent)
void read_rest_of_floating_point_number (agent *thisAgent)
void record_position_of_start_of_lexeme (agent *thisAgent)
void set_lexer_allow_ids (agent *thisAgent, Bool allow_identifiers)
void skip_ahead_to_balanced_parentheses (agent *thisAgent, int parentheses_level)
void start_lex_from_file (agent *thisAgent, const char *filename, FILE *already_opened_file)
void stop_lex_from_file (agent *thisAgent)
void store_and_advance (agent *thisAgent)

Variables

Bool constituent_char [256]
Bool number_starters [256]
Bool whitespace [256]

Macro Definition Documentation

#define lu   lex_unknown

Definition at line 406 of file lexer.cpp.

Function Documentation

int current_lexer_parentheses_level ( agent thisAgent)

Definition at line 1078 of file lexer.cpp.

References agent_struct::current_file, and lexer_source_file_struct::parentheses_level.

{
return thisAgent->current_file->parentheses_level;
}
void determine_possible_symbol_types_for_string ( char *  s,
size_t  length_of_s,
Bool possible_id,
Bool possible_var,
Bool possible_sc,
Bool possible_ic,
Bool possible_fc,
Bool rereadable 
)

Definition at line 1125 of file lexer.cpp.

References constituent_char, FALSE, LENGTH_OF_LONGEST_SPECIAL_LEXEME, number_starters, and TRUE.

Referenced by determine_type_of_constituent_string(), get_io_symbol_from_tio_constituent_string(), and symbol_to_string().

{
char *ch;
Bool all_alphanum;
*possible_id = FALSE;
*possible_var = FALSE;
*possible_sc = FALSE;
*possible_ic = FALSE;
*possible_fc = FALSE;
*rereadable = FALSE;
/* --- check if it's an integer or floating point number --- */
if (number_starters[static_cast<unsigned char>(*s)]) {
ch = s;
if ((*ch=='+')||(*ch=='-'))
ch++; /* optional leading + or - */
while (isdigit(*ch))
ch++; /* string of digits */
if ((*ch==0)&&(isdigit(*(ch-1))))
*possible_ic = TRUE;
if (*ch=='.') {
ch++; /* decimal point */
while (isdigit(*ch))
ch++; /* string of digits */
if ((*ch=='e')||(*ch=='E')) {
ch++; /* E */
if ((*ch=='+')||(*ch=='-'))
ch++; /* optional leading + or - */
while (isdigit(*ch))
ch++; /* string of digits */
}
if (*ch==0)
*possible_fc = TRUE;
}
}
/* --- make sure it's entirely constituent characters --- */
for (ch=s; *ch!=0; ch++)
if (! constituent_char[static_cast<unsigned char>(*ch)])
return;
/* --- check for rereadability --- */
all_alphanum = TRUE;
for (ch=s; *ch!='\0'; ch++) {
if (!isalnum(*ch)) {
all_alphanum = FALSE;
break;
}
}
if ( all_alphanum ||
((length_of_s==1)&&(*s=='*')) )
{
*rereadable = TRUE;
}
/* --- any string of constituents could be a sym constant --- */
*possible_sc = TRUE;
/* --- check whether it's a variable --- */
if ((*s=='<')&&(*(s+length_of_s-1)=='>'))
*possible_var = TRUE;
/* --- check if it's an identifier --- */
// long term identifiers start with @
if (*s == '@') {
ch = s+1;
} else {
ch = s;
}
if (isalpha(*ch) && *(++ch) != '\0') {
/* --- is the rest of the string an integer? --- */
while (isdigit(*ch))
ch++;
if (*ch=='\0')
*possible_id = TRUE;
}
}
Bool determine_type_of_constituent_string ( agent thisAgent)

Definition at line 307 of file lexer.cpp.

References lexer_source_file_struct::allow_ids, agent_struct::current_file, determine_possible_symbol_types_for_string(), FLOAT_CONSTANT_LEXEME, lexeme_info::float_val, lexeme_info::id_letter, lexeme_info::id_number, IDENTIFIER_LEXEME, INT_CONSTANT_LEXEME, lexeme_info::int_val, lexeme_info::length, agent_struct::lexeme, print(), print_location_of_most_recent_lexeme(), PRINT_WARNINGS_SYSPARAM, QUOTED_STRING_LEXEME, lexeme_info::string, SYM_CONSTANT_LEXEME, agent_struct::sysparams, TRUE, lexeme_info::type, VARIABLE_LEXEME, and xml_generate_warning().

Referenced by get_lexeme_from_string(), lex_ampersand(), lex_constituent_string(), lex_digit(), lex_equal(), lex_greater(), lex_less(), lex_minus(), lex_period(), and lex_plus().

{
Bool possible_id, possible_var, possible_sc, possible_ic, possible_fc;
Bool rereadable;
thisAgent->lexeme.length,
&possible_id,
&possible_var,
&possible_sc,
&possible_ic,
&possible_fc,
&rereadable);
if (possible_var) {
thisAgent->lexeme.type = VARIABLE_LEXEME;
return TRUE;
}
if (possible_ic) {
errno = 0;
thisAgent->lexeme.int_val = strtol (thisAgent->lexeme.string,NULL,10);
if (errno) {
print (thisAgent, "Error: bad integer (probably too large)\n");
thisAgent->lexeme.int_val = 0;
}
return (errno == 0);
}
if (possible_fc) {
errno = 0;
thisAgent->lexeme.float_val = strtod (thisAgent->lexeme.string,NULL);
if (errno) {
print (thisAgent, "Error: bad floating point number\n");
thisAgent->lexeme.float_val = 0.0;
}
return (errno == 0);
}
if (thisAgent->current_file->allow_ids && possible_id) {
// long term identifiers start with @
unsigned lti_index = 0;
if (thisAgent->lexeme.string[lti_index] == '@') {
lti_index += 1;
}
thisAgent->lexeme.id_letter = static_cast<char>(toupper(thisAgent->lexeme.string[lti_index]));
lti_index += 1;
errno = 0;
if (!from_c_string(thisAgent->lexeme.id_number, &(thisAgent->lexeme.string[lti_index]))) {
print (thisAgent, "Error: bad number for identifier (probably too large)\n");
thisAgent->lexeme.id_number = 0;
errno = 1;
}
return (errno == 0);
}
if (possible_sc) {
if ( (thisAgent->lexeme.string[0] == '<') ||
(thisAgent->lexeme.string[thisAgent->lexeme.length-1] == '>') )
{
print (thisAgent, "Warning: Suspicious string constant \"%s\"\n", thisAgent->lexeme.string);
xml_generate_warning(thisAgent, "Warning: Suspicious string constant");
}
}
return TRUE;
}
return TRUE;
}
void do_fake_rparen ( agent thisAgent)
void fake_rparen_at_next_end_of_line ( agent thisAgent)
void finish ( agent thisAgent)
inline
void get_lexeme ( agent thisAgent)

Definition at line 747 of file lexer.cpp.

References agent_struct::current_char, agent_struct::current_file, do_fake_rparen(), lexer_source_file_struct::fake_rparen_at_eol, FALSE, get_next_char(), lexeme_info::length, agent_struct::lex_alias, lex_eof(), expansion_node::lexeme, agent_struct::lexeme, lexer_routines(), agent_struct::load_errors_quit, expansion_node::next, record_position_of_start_of_lexeme(), lexeme_info::string, and whitespace.

Referenced by lex_unknown(), parse_attr_value_make(), parse_attr_value_tests(), parse_cond(), parse_disjunction_test(), parse_function_call_after_lparen(), parse_head_of_conds_for_one_id(), parse_lti(), parse_preference_specifier_without_referent(), parse_preferences(), parse_preferences_soar8_non_operator(), parse_production(), parse_relational_test(), parse_rhs_action(), parse_rhs_value(), parse_tail_of_conds_for_one_id(), parse_test(), parse_value_test_star(), skip_ahead_to_balanced_parentheses(), smem_parse_chunk(), and smem_parse_chunks().

{
/* AGR 568 begin */
if (thisAgent->lex_alias) {
thisAgent->lexeme = thisAgent->lex_alias->lexeme;
thisAgent->lex_alias = thisAgent->lex_alias->next;
return;
}
/* AGR 568 end */
thisAgent->lexeme.length = 0;
thisAgent->lexeme.string[0] = 0;
/* AGR 534 The only time a prompt should be printed out is if there's
a command being expected; ie. the prompt shouldn't print out if we're
in the middle of entering a production. So if we're in the middle of
entering a production, then the parentheses level will be > 0, so that's
the criteria we will use. AGR 5-Apr-94 */
thisAgent->load_errors_quit = FALSE; /* AGR 527c */
while (thisAgent->load_errors_quit==FALSE) { /* AGR 527c */
if (thisAgent->current_char==EOF) break;
if (whitespace[static_cast<unsigned char>(thisAgent->current_char)]) {
if (thisAgent->current_char == '\n')
{
if (thisAgent->current_file->fake_rparen_at_eol) {
do_fake_rparen(thisAgent);
return;
}
}
get_next_char(thisAgent);
continue;
}
//#ifdef USE_TCL
if (thisAgent->current_char==';') {
/* --- skip the semi-colon, forces newline in TCL --- */
get_next_char(thisAgent); /* consume it */
continue;
}
if (thisAgent->current_char=='#') {
/* --- read from hash to end-of-line --- */
while ((thisAgent->current_char!='\n') &&
(thisAgent->current_char!=EOF))
get_next_char(thisAgent);
if (thisAgent->current_file->fake_rparen_at_eol) {
do_fake_rparen(thisAgent);
return;
}
if (thisAgent->current_char!=EOF) get_next_char(thisAgent);
continue;
}
//#else
// if (thisAgent->current_char==';') {
// /* --- read from semicolon to end-of-line --- */
// while ((thisAgent->current_char!='\n') &&
// (thisAgent->current_char!=EOF))
// get_next_char(thisAgent);
// if (thisAgent->current_file->fake_rparen_at_eol) {
// do_fake_rparen(thisAgent);
// return;
// }
// if (thisAgent->current_char!=EOF) get_next_char(thisAgent);
// continue;
// }
// if (thisAgent->current_char=='#') {
// /* --- comments surrounded by "#|" and "|#" delimiters --- */
// record_position_of_start_of_lexeme(); /* in case of later error mesg. */
// get_next_char(thisAgent);
// if (thisAgent->current_char!='|') {
// print ("Error: '#' not followed by '|'\n");
// print_location_of_most_recent_lexeme(thisAgent);
// continue;
// }
// get_next_char(thisAgent); /* consume the vbar */
// while (TRUE) {
// if (thisAgent->current_char==EOF) {
// print ("Error: '#|' without terminating '|#'\n");
// print_location_of_most_recent_lexeme(thisAgent);
// break;
// }
// if (thisAgent->current_char!='|') { get_next_char(thisAgent); continue; }
// get_next_char(thisAgent);
// if (thisAgent->current_char=='#') break;
// }
// get_next_char(thisAgent); /* consume the closing '#' */
// continue; /* continue outer while(TRUE), reading more whitespace */
// }
//#endif /* USE_TCL */
break; /* if no whitespace or comments found, break out of the loop */
}
/* --- no more whitespace, so go get the actual lexeme --- */
if (thisAgent->current_char!=EOF)
(*(lexer_routines[static_cast<unsigned char>(thisAgent->current_char)]))(thisAgent);
else
lex_eof(thisAgent);
}
Bool get_lexer_allow_ids ( agent thisAgent)

Definition at line 1109 of file lexer.cpp.

References lexer_source_file_struct::allow_ids, and agent_struct::current_file.

Referenced by parse_lti().

{
return thisAgent->current_file->allow_ids;
}
void get_next_char ( agent thisAgent)

Definition at line 140 of file lexer.cpp.

References abort_with_fatal_error(), agent_struct::alternate_input_exit, agent_struct::alternate_input_string, agent_struct::alternate_input_suffix, lexer_source_file_struct::buffer, BUFSIZE, agent_struct::current_char, lexer_source_file_struct::current_column, agent_struct::current_file, lexer_source_file_struct::current_line, lexer_source_file_struct::file, lexer_source_file_struct::filename, MAX_LEXER_LINE_LENGTH, NIL, print(), reading_from_top_level(), and tell_printer_that_output_column_has_been_reset().

Referenced by get_lexeme(), lex_dollar(), lex_quote(), lex_unknown(), lex_vbar(), and store_and_advance().

{
char *s;
if ( thisAgent->alternate_input_exit &&
(thisAgent->alternate_input_string == NULL) &&
(thisAgent->alternate_input_suffix == NULL) ) {
thisAgent->current_char = EOF;
//assert(0 && "error in lexer.cpp (control_c_handler() used to be called here)");
return;
}
if (thisAgent->alternate_input_string != NULL)
{
thisAgent->current_char = *thisAgent->alternate_input_string++;
if (thisAgent->current_char == '\0')
{
thisAgent->current_char =
*thisAgent->alternate_input_suffix++;
}
}
else if (thisAgent->alternate_input_suffix != NULL)
{
thisAgent->current_char = *thisAgent->alternate_input_suffix++;
if (thisAgent->current_char == '\0')
{
if ( thisAgent->alternate_input_exit ) {
thisAgent->current_char = EOF;
//assert(0 && "error in lexer.cpp (control_c_handler() used to be called here)");
return;
}
thisAgent->current_char = thisAgent->current_file->buffer
[thisAgent->current_file->current_column++];
}
}
else
{
thisAgent->current_char = thisAgent->current_file->buffer
[thisAgent->current_file->current_column++];
}
if (thisAgent->current_char) return;
if ((thisAgent->current_file->current_column == BUFSIZE) &&
(thisAgent->current_file->buffer[BUFSIZE-2] != '\n') &&
(thisAgent->current_file->buffer[BUFSIZE-2] != EOF)) {
char msg[512];
SNPRINTF (msg, 512,
"lexer.c: Error: line too long (max allowed is %d chars)\nFile %s, line %llu\n",
static_cast<long long unsigned>(thisAgent->current_file->current_line));
msg[511] = 0; /* ensure null termination */
abort_with_fatal_error(thisAgent, msg);
}
s = fgets (thisAgent->current_file->buffer, BUFSIZE, thisAgent->current_file->file);
if (s) {
thisAgent->current_file->current_line++;
if (reading_from_top_level(thisAgent)) {
}
} else {
/* s==NIL means immediate eof encountered or read error occurred */
if (! feof(thisAgent->current_file->file)) {
if(reading_from_top_level(thisAgent)) {
assert(0 && "error in lexer.cpp (control_c_handler() used to be called here)");
return;
} else {
print (thisAgent, "I/O error while reading file %s; ignoring the rest of it.\n",
thisAgent->current_file->filename);
}
}
thisAgent->current_file->buffer[0] = 0;
}
thisAgent->current_char = thisAgent->current_file->buffer[0];
thisAgent->current_file->current_column = 1;
}
void init_lexer ( agent thisAgent)

Definition at line 861 of file lexer.cpp.

References constituent_char, lex_ampersand(), lex_at(), lex_comma(), lex_constituent_string(), lex_digit(), lex_dollar(), lex_equal(), lex_exclamation_point(), lex_greater(), lex_lbrace(), lex_less(), lex_lparen(), lex_minus(), lex_period(), lex_plus(), lex_quote(), lex_rbrace(), lex_rparen(), lex_tilde(), lex_up_arrow(), lex_vbar(), lexer_routines(), number_starters, start_lex_from_file(), TRUE, and whitespace.

Referenced by init_soar_agent().

{
static bool initialized = false;
if(!initialized)
{
initialized = true;
unsigned int i;
/* --- setup constituent_char array --- */
char extra_constituents[] = "$%&*+-/:<=>?_@";
for (i=0; i<256; i++)
{
//
// When i == 1, strchr returns true based on the terminating
// character. This is not the intent, so we exclude that case
// here.
//
if((strchr(extra_constituents, i) != 0) && i != 0)
{
}
else
{
constituent_char[i] = (isalnum(i) != 0);
}
}
// for (i=0; i<strlen(extra_constituents); i++)
// {
// constituent_char[(int)extra_constituents[i]]=TRUE;
// }
/* --- setup whitespace array --- */
for (i=0; i<256; i++)
{
whitespace[i] = (isspace(i) != 0);
}
/* --- setup number_starters array --- */
for (i=0; i<256; i++)
{
switch(i)
{
case '+':
break;
case '-':
break;
case '.':
break;
default:
number_starters[i] = (isdigit(i) != 0);
}
}
/* --- setup lexer_routines array --- */
//
// I go to some effort here to insure that values do not
// get overwritten. That could cause problems in a multi-
// threaded sense because values could get switched to one
// value and then another. If a value is only ever set to
// one thing, resetting it to the same thing should be
// perfectly safe.
//
for (i=0; i<256; i++)
{
switch(i)
{
case '@':
break;
case '(':
break;
case ')':
break;
case '+':
break;
case '-':
break;
case '~':
break;
case '^':
break;
case '{':
break;
case '}':
break;
case '!':
break;
case '>':
break;
case '<':
break;
case '=':
break;
case '&':
break;
case '|':
break;
case ',':
break;
case '.':
break;
case '"':
break;
case '$':
lexer_routines[(int)'$'] = lex_dollar; /* AGR 562 */
break;
default:
if (isdigit(i))
{
continue;
}
{
continue;
}
}
}
}
/* --- initially we're reading from the standard input --- */
start_lex_from_file (thisAgent, "[standard input]", stdin);
}
void lex_ampersand ( agent thisAgent)

Definition at line 492 of file lexer.cpp.

References AMPERSAND_LEXEME, determine_type_of_constituent_string(), lexeme_info::length, agent_struct::lexeme, read_constituent_string(), and lexeme_info::type.

Referenced by init_lexer().

{
/* Lexeme might be "&", or symbol */
/* Note: this routine relies on & being a constituent character */
if (thisAgent->lexeme.length==1) { thisAgent->lexeme.type = AMPERSAND_LEXEME; return; }
}
void lex_at ( agent thisAgent)

Definition at line 441 of file lexer.cpp.

References AT_LEXEME, finish(), agent_struct::lexeme, store_and_advance(), and lexeme_info::type.

Referenced by init_lexer().

{
store_and_advance(thisAgent);
finish(thisAgent);
thisAgent->lexeme.type = AT_LEXEME;
}
void lex_comma ( agent thisAgent)

Definition at line 477 of file lexer.cpp.

References COMMA_LEXEME, finish(), agent_struct::lexeme, store_and_advance(), and lexeme_info::type.

Referenced by init_lexer().

{
store_and_advance(thisAgent);
finish(thisAgent);
thisAgent->lexeme.type = COMMA_LEXEME;
}
void lex_constituent_string ( agent thisAgent)
void lex_digit ( agent thisAgent)

Definition at line 599 of file lexer.cpp.

References agent_struct::current_char, determine_type_of_constituent_string(), FALSE, lexeme_info::length, agent_struct::lexeme, read_constituent_string(), read_rest_of_floating_point_number(), lexeme_info::string, and TRUE.

Referenced by init_lexer().

{
int i;
Bool could_be_floating_point;
/* --- if we stopped at '.', it might be a floating-point number, so be
careful to check for this case --- */
if (thisAgent->current_char=='.') {
could_be_floating_point = TRUE;
for (i=1; i<thisAgent->lexeme.length; i++)
if (! isdigit(thisAgent->lexeme.string[i])) could_be_floating_point = FALSE;
if (could_be_floating_point) read_rest_of_floating_point_number(thisAgent);
}
}
void lex_dollar ( agent thisAgent)

Definition at line 714 of file lexer.cpp.

References agent_struct::current_char, DOLLAR_STRING_LEXEME, get_next_char(), lexeme_info::length, agent_struct::lexeme, MAX_LEXEME_LENGTH, lexeme_info::string, and lexeme_info::type.

Referenced by init_lexer().

{
thisAgent->lexeme.string[0] = '$';
thisAgent->lexeme.length = 1;
get_next_char(thisAgent); /* consume the '$' */
while ((thisAgent->current_char!='\n') &&
(thisAgent->current_char!=EOF) &&
(thisAgent->lexeme.length < MAX_LEXEME_LENGTH-1)) {
thisAgent->lexeme.string[thisAgent->lexeme.length++] =
char(thisAgent->current_char);
get_next_char(thisAgent);
}
thisAgent->lexeme.string[thisAgent->lexeme.length] = '\0';
}
void lex_eof ( agent thisAgent)
void lex_equal ( agent thisAgent)

Definition at line 483 of file lexer.cpp.

References determine_type_of_constituent_string(), EQUAL_LEXEME, lexeme_info::length, agent_struct::lexeme, read_constituent_string(), and lexeme_info::type.

Referenced by init_lexer().

{
/* Lexeme might be "=", or symbol */
/* Note: this routine relies on = being a constituent character */
if (thisAgent->lexeme.length==1) { thisAgent->lexeme.type = EQUAL_LEXEME; return; }
}
void lex_exclamation_point ( agent thisAgent)

Definition at line 471 of file lexer.cpp.

References EXCLAMATION_POINT_LEXEME, finish(), agent_struct::lexeme, store_and_advance(), and lexeme_info::type.

Referenced by init_lexer().

{
store_and_advance(thisAgent);
finish(thisAgent);
}
void lex_greater ( agent thisAgent)

Definition at line 515 of file lexer.cpp.

References determine_type_of_constituent_string(), GREATER_EQUAL_LEXEME, GREATER_GREATER_LEXEME, GREATER_LEXEME, lexeme_info::length, agent_struct::lexeme, read_constituent_string(), lexeme_info::string, and lexeme_info::type.

Referenced by init_lexer().

{
/* Lexeme might be ">", ">=", ">>", or symbol */
/* Note: this routine relies on =,> being constituent characters */
if (thisAgent->lexeme.length==1) { thisAgent->lexeme.type = GREATER_LEXEME; return; }
if (thisAgent->lexeme.length==2) {
if (thisAgent->lexeme.string[1]=='>') { thisAgent->lexeme.type = GREATER_GREATER_LEXEME; return;}
if (thisAgent->lexeme.string[1]=='=') { thisAgent->lexeme.type = GREATER_EQUAL_LEXEME; return; }
}
}
void lex_lbrace ( agent thisAgent)

Definition at line 459 of file lexer.cpp.

References finish(), L_BRACE_LEXEME, agent_struct::lexeme, store_and_advance(), and lexeme_info::type.

Referenced by init_lexer().

{
store_and_advance(thisAgent);
finish(thisAgent);
thisAgent->lexeme.type = L_BRACE_LEXEME;
}
void lex_less ( agent thisAgent)

Definition at line 528 of file lexer.cpp.

References determine_type_of_constituent_string(), lexeme_info::length, LESS_EQUAL_GREATER_LEXEME, LESS_EQUAL_LEXEME, LESS_LESS_LEXEME, LESS_LEXEME, agent_struct::lexeme, NOT_EQUAL_LEXEME, read_constituent_string(), lexeme_info::string, and lexeme_info::type.

Referenced by init_lexer().

{
/* Lexeme might be "<", "<=", "<=>", "<>", "<<", or variable */
/* Note: this routine relies on =,<,> being constituent characters */
if (thisAgent->lexeme.length==1) { thisAgent->lexeme.type = LESS_LEXEME; return; }
if (thisAgent->lexeme.length==2) {
if (thisAgent->lexeme.string[1]=='>') { thisAgent->lexeme.type = NOT_EQUAL_LEXEME; return; }
if (thisAgent->lexeme.string[1]=='=') { thisAgent->lexeme.type = LESS_EQUAL_LEXEME; return; }
if (thisAgent->lexeme.string[1]=='<') { thisAgent->lexeme.type = LESS_LESS_LEXEME; return; }
}
if (thisAgent->lexeme.length==3) {
if ((thisAgent->lexeme.string[1]=='=')&&(thisAgent->lexeme.string[2]=='>'))
{ thisAgent->lexeme.type = LESS_EQUAL_GREATER_LEXEME; return; }
}
}
void lex_lparen ( agent thisAgent)
void lex_minus ( agent thisAgent)

Definition at line 576 of file lexer.cpp.

References agent_struct::current_char, determine_type_of_constituent_string(), FALSE, lexeme_info::length, agent_struct::lexeme, MINUS_LEXEME, read_constituent_string(), read_rest_of_floating_point_number(), RIGHT_ARROW_LEXEME, lexeme_info::string, TRUE, and lexeme_info::type.

Referenced by init_lexer().

{
/* Lexeme might be -, -->, number, or symbol */
/* Note: this routine relies on various things being constituent chars */
int i;
Bool could_be_floating_point;
/* --- if we stopped at '.', it might be a floating-point number, so be
careful to check for this case --- */
if (thisAgent->current_char=='.') {
could_be_floating_point = TRUE;
for (i=1; i<thisAgent->lexeme.length; i++)
if (! isdigit(thisAgent->lexeme.string[i])) could_be_floating_point = FALSE;
if (could_be_floating_point) read_rest_of_floating_point_number(thisAgent);
}
if (thisAgent->lexeme.length==1) { thisAgent->lexeme.type = MINUS_LEXEME; return; }
if (thisAgent->lexeme.length==3) {
if ((thisAgent->lexeme.string[1]=='-')&&(thisAgent->lexeme.string[2]=='>'))
{ thisAgent->lexeme.type = RIGHT_ARROW_LEXEME; return; }
}
}
void lex_period ( agent thisAgent)

Definition at line 547 of file lexer.cpp.

References agent_struct::current_char, determine_type_of_constituent_string(), finish(), lexeme_info::length, agent_struct::lexeme, PERIOD_LEXEME, read_rest_of_floating_point_number(), store_and_advance(), and lexeme_info::type.

Referenced by init_lexer().

{
store_and_advance(thisAgent);
finish(thisAgent);
/* --- if we stopped at '.', it might be a floating-point number, so be
careful to check for this case --- */
if (isdigit(thisAgent->current_char)) read_rest_of_floating_point_number(thisAgent);
if (thisAgent->lexeme.length==1) { thisAgent->lexeme.type = PERIOD_LEXEME; return; }
}
void lex_plus ( agent thisAgent)

Definition at line 557 of file lexer.cpp.

References agent_struct::current_char, determine_type_of_constituent_string(), FALSE, lexeme_info::length, agent_struct::lexeme, PLUS_LEXEME, read_constituent_string(), read_rest_of_floating_point_number(), lexeme_info::string, TRUE, and lexeme_info::type.

Referenced by init_lexer().

{
/* Lexeme might be +, number, or symbol */
/* Note: this routine relies on various things being constituent chars */
int i;
Bool could_be_floating_point;
/* --- if we stopped at '.', it might be a floating-point number, so be
careful to check for this case --- */
if (thisAgent->current_char=='.') {
could_be_floating_point = TRUE;
for (i=1; i<thisAgent->lexeme.length; i++)
if (! isdigit(thisAgent->lexeme.string[i])) could_be_floating_point = FALSE;
if (could_be_floating_point) read_rest_of_floating_point_number(thisAgent);
}
if (thisAgent->lexeme.length==1) { thisAgent->lexeme.type = PLUS_LEXEME; return; }
}
void lex_quote ( agent thisAgent)

Definition at line 669 of file lexer.cpp.

References agent_struct::current_char, EOF_LEXEME, get_next_char(), lexeme_info::length, agent_struct::lexeme, MAX_LEXEME_LENGTH, print(), print_location_of_most_recent_lexeme(), QUOTED_STRING_LEXEME, lexeme_info::string, TRUE, and lexeme_info::type.

Referenced by init_lexer().

{
get_next_char(thisAgent);
do {
if ((thisAgent->current_char==EOF)||(thisAgent->lexeme.length==MAX_LEXEME_LENGTH)) {
print (thisAgent, "Error: opening '\"' without closing '\"'\n");
/* BUGBUG if reading from top level, don't want to signal EOF */
thisAgent->lexeme.type = EOF_LEXEME;
thisAgent->lexeme.string[0]=0;
thisAgent->lexeme.length = 1;
return;
}
if (thisAgent->current_char=='\\') {
get_next_char(thisAgent);
thisAgent->lexeme.string[thisAgent->lexeme.length++] = char(thisAgent->current_char);
get_next_char(thisAgent);
} else if (thisAgent->current_char=='"') {
get_next_char(thisAgent);
break;
} else {
thisAgent->lexeme.string[thisAgent->lexeme.length++] = char(thisAgent->current_char);
get_next_char(thisAgent);
}
} while(TRUE);
thisAgent->lexeme.string[thisAgent->lexeme.length]=0;
}
void lex_rbrace ( agent thisAgent)

Definition at line 465 of file lexer.cpp.

References finish(), agent_struct::lexeme, R_BRACE_LEXEME, store_and_advance(), and lexeme_info::type.

Referenced by init_lexer().

{
store_and_advance(thisAgent);
finish(thisAgent);
thisAgent->lexeme.type = R_BRACE_LEXEME;
}
void lex_rparen ( agent thisAgent)
void lex_tilde ( agent thisAgent)

Definition at line 447 of file lexer.cpp.

References finish(), agent_struct::lexeme, store_and_advance(), TILDE_LEXEME, and lexeme_info::type.

Referenced by init_lexer().

{
store_and_advance(thisAgent);
finish(thisAgent);
thisAgent->lexeme.type = TILDE_LEXEME;
}
void lex_unknown ( agent thisAgent)

Definition at line 615 of file lexer.cpp.

References agent_struct::current_char, lexer_source_file_struct::current_column, agent_struct::current_file, lexer_source_file_struct::current_line, lexer_source_file_struct::filename, get_lexeme(), get_next_char(), agent_struct::load_errors_quit, print(), and reading_from_top_level().

{
if(reading_from_top_level(thisAgent) && thisAgent->current_char == 0) {
}
else {
print (thisAgent, "Error: Unknown character encountered by lexer, code=%d\n",
thisAgent->current_char);
print (thisAgent, "File %s, line %lu, column %lu.\n", thisAgent->current_file->filename,
thisAgent->current_file->current_line,
if (! reading_from_top_level(thisAgent)) {
//respond_to_load_errors (thisAgent);
if (thisAgent->load_errors_quit)
thisAgent->current_char = EOF;
}
}
get_next_char(thisAgent);
get_lexeme(thisAgent);
}
void lex_up_arrow ( agent thisAgent)

Definition at line 453 of file lexer.cpp.

References finish(), agent_struct::lexeme, store_and_advance(), lexeme_info::type, and UP_ARROW_LEXEME.

Referenced by init_lexer().

{
store_and_advance(thisAgent);
finish(thisAgent);
thisAgent->lexeme.type = UP_ARROW_LEXEME;
}
void lex_vbar ( agent thisAgent)

Definition at line 639 of file lexer.cpp.

References agent_struct::current_char, EOF_LEXEME, get_next_char(), lexeme_info::length, agent_struct::lexeme, MAX_LEXEME_LENGTH, print(), print_location_of_most_recent_lexeme(), lexeme_info::string, SYM_CONSTANT_LEXEME, TRUE, and lexeme_info::type.

Referenced by init_lexer().

{
get_next_char(thisAgent);
do {
if ((thisAgent->current_char==EOF)||
(thisAgent->lexeme.length==MAX_LEXEME_LENGTH)) {
print (thisAgent, "Error: opening '|' without closing '|'\n");
/* BUGBUG if reading from top level, don't want to signal EOF */
thisAgent->lexeme.type = EOF_LEXEME;
thisAgent->lexeme.string[0]=EOF;
thisAgent->lexeme.string[1]=0;
thisAgent->lexeme.length = 1;
return;
}
if (thisAgent->current_char=='\\') {
get_next_char(thisAgent);
thisAgent->lexeme.string[thisAgent->lexeme.length++] = char(thisAgent->current_char);
get_next_char(thisAgent);
} else if (thisAgent->current_char=='|') {
get_next_char(thisAgent);
break;
} else {
thisAgent->lexeme.string[thisAgent->lexeme.length++] = char(thisAgent->current_char);
get_next_char(thisAgent);
}
} while(TRUE);
thisAgent->lexeme.string[thisAgent->lexeme.length]=0;
}
void lexer_routines ( agent )

Referenced by get_lexeme(), and init_lexer().

void print_location_of_most_recent_lexeme ( agent thisAgent)

Definition at line 1022 of file lexer.cpp.

References lexer_source_file_struct::buffer, lexer_source_file_struct::column_of_start_of_last_lexeme, agent_struct::current_char, agent_struct::current_file, lexer_source_file_struct::current_line, lexer_source_file_struct::filename, lexer_source_file_struct::line_of_start_of_last_lexeme, agent_struct::load_errors_quit, print(), print_string(), and reading_from_top_level().

Referenced by determine_type_of_constituent_string(), lex_quote(), lex_vbar(), parse_attr_value_make(), parse_attr_value_tests(), parse_cond(), parse_disjunction_test(), parse_function_call_after_lparen(), parse_head_of_conds_for_one_id(), parse_preferences_soar8_non_operator(), parse_production(), parse_relational_test(), parse_rhs_action(), parse_rhs_value(), and read_identifier_or_context_variable().

{
int i;
thisAgent->current_file->current_line) {
/* --- error occurred on current line, so print out the line --- */
if (! reading_from_top_level(thisAgent)) {
print (thisAgent, "File %s, line %lu:\n", thisAgent->current_file->filename,
thisAgent->current_file->current_line);
/* respond_to_load_errors (); AGR 527a */
}
if (thisAgent->current_file->buffer[strlen(thisAgent->current_file->buffer)-1]=='\n')
print_string (thisAgent, thisAgent->current_file->buffer);
else
print (thisAgent, "%s\n",thisAgent->current_file->buffer);
for (i=0; i<thisAgent->current_file->column_of_start_of_last_lexeme; i++)
print_string (thisAgent, "-");
print_string (thisAgent, "^\n");
if (! reading_from_top_level(thisAgent)) {
//respond_to_load_errors (thisAgent); /* AGR 527a */
if (thisAgent->load_errors_quit)
thisAgent->current_char = EOF;
}
/* AGR 527a The respond_to_load_errors call came too early (above),
and the "continue" prompt appeared before the offending line was printed
out, so the respond_to_load_errors call was moved here.
AGR 26-Apr-94 */
} else {
/* --- error occurred on a previous line, so just give the position --- */
print (thisAgent, "File %s, line %lu, column %lu.\n", thisAgent->current_file->filename,
if (! reading_from_top_level(thisAgent)) {
//respond_to_load_errors (thisAgent);
if (thisAgent->load_errors_quit)
thisAgent->current_char = EOF;
}
}
}
void read_constituent_string ( agent thisAgent)

Definition at line 266 of file lexer.cpp.

References constituent_char, agent_struct::current_char, finish(), and store_and_advance().

Referenced by lex_ampersand(), lex_constituent_string(), lex_digit(), lex_equal(), lex_greater(), lex_less(), lex_minus(), and lex_plus().

{
#ifdef __SC__
char *buf;
int i,len;
#endif
while ((thisAgent->current_char!=EOF) &&
constituent_char[static_cast<unsigned char>(thisAgent->current_char)])
store_and_advance(thisAgent);
finish(thisAgent);
}
void read_rest_of_floating_point_number ( agent thisAgent)

Definition at line 278 of file lexer.cpp.

References allocate_memory(), agent_struct::current_char, finish(), free_memory_block_for_string(), agent_struct::lexeme, store_and_advance(), lexeme_info::string, and STRING_MEM_USAGE.

Referenced by lex_digit(), lex_minus(), lex_period(), and lex_plus().

{
/* --- at entry, current_char=="."; we read the "." and rest of number --- */
store_and_advance(thisAgent);
while (isdigit(thisAgent->current_char)) store_and_advance(thisAgent); /* string of digits */
if ((thisAgent->current_char=='e')||(thisAgent->current_char=='E')) {
store_and_advance(thisAgent); /* E */
if ((thisAgent->current_char=='+')||(thisAgent->current_char=='-'))
store_and_advance(thisAgent); /* optional leading + or - */
while (isdigit(thisAgent->current_char)) store_and_advance(thisAgent); /* string of digits */
}
finish(thisAgent);
#ifdef __SC__
if (strcmp("soar>",thisAgent->lexeme.string)) { /* if the lexeme doesn't equal "soar>" */
if (!(strncmp("soar>",thisAgent->lexeme.string,5))) { /* but the first 5 chars are "soar>" */
/* then SIOW messed up so ignore the "soar>" */
buf = (char *)allocate_memory(thisAgent, (len=(strlen(thisAgent->lexeme.string)+1))*sizeof(char),STRING_MEM_USAGE);
for (i=0;i<=len;i++) {
buf[i] = thisAgent->lexeme.string[i];
}
for (i=5;i<=len;i++) {
thisAgent->lexeme.string[i-5] = buf[i];
}
}
}
#endif
}
void record_position_of_start_of_lexeme ( agent thisAgent)
inline
void set_lexer_allow_ids ( agent thisAgent,
Bool  allow_identifiers 
)

Definition at line 1105 of file lexer.cpp.

References lexer_source_file_struct::allow_ids, and agent_struct::current_file.

Referenced by parse_lti(), and smem_parse_chunks().

{
thisAgent->current_file->allow_ids = allow_identifiers;
}
void skip_ahead_to_balanced_parentheses ( agent thisAgent,
int  parentheses_level 
)

Definition at line 1082 of file lexer.cpp.

References agent_struct::current_file, EOF_LEXEME, get_lexeme(), agent_struct::lexeme, lexer_source_file_struct::parentheses_level, R_PAREN_LEXEME, TRUE, and lexeme_info::type.

{
while (TRUE) {
if (thisAgent->lexeme.type==EOF_LEXEME) return;
if ((thisAgent->lexeme.type==R_PAREN_LEXEME) &&
(parentheses_level==thisAgent->current_file->parentheses_level)) return;
get_lexeme(thisAgent);
}
}
void start_lex_from_file ( agent thisAgent,
const char *  filename,
FILE *  already_opened_file 
)

Definition at line 94 of file lexer.cpp.

References allocate_memory(), lexer_source_file_struct::allow_ids, lexer_source_file_struct::buffer, lexer_source_file_struct::column_of_start_of_last_lexeme, agent_struct::current_char, lexer_source_file_struct::current_column, agent_struct::current_file, lexer_source_file_struct::current_line, lexer_source_file_struct::fake_rparen_at_eol, FALSE, lexer_source_file_struct::file, lexer_source_file_struct::filename, agent_struct::lexeme, lexer_source_file_struct::line_of_start_of_last_lexeme, make_memory_block_for_string(), MISCELLANEOUS_MEM_USAGE, lexer_source_file_struct::parent_file, lexer_source_file_struct::parentheses_level, lexer_source_file_struct::saved_current_char, lexer_source_file_struct::saved_lexeme, and TRUE.

Referenced by init_lexer(), and load_file().

{
lsf = static_cast<lexer_source_file_struct *>(allocate_memory (thisAgent, sizeof(lexer_source_file),
lsf->saved_lexeme = thisAgent->lexeme;
lsf->saved_current_char = thisAgent->current_char;
lsf->parent_file = thisAgent->current_file;
thisAgent->current_file = lsf;
lsf->filename = make_memory_block_for_string (thisAgent, filename);
lsf->file = already_opened_file;
lsf->allow_ids = TRUE;
lsf->current_line = 0;
lsf->current_column = 0;
lsf->buffer[0] = 0;
thisAgent->current_char = ' '; /* whitespace--to force immediate read of first line */
}
void stop_lex_from_file ( agent thisAgent)
void store_and_advance ( agent thisAgent)
inline

Variable Documentation

Bool constituent_char[256]
Bool number_starters[256]

Definition at line 81 of file lexer.cpp.

Referenced by determine_possible_symbol_types_for_string(), and init_lexer().

Bool whitespace[256]

Definition at line 80 of file lexer.cpp.

Referenced by get_lexeme(), and init_lexer().