Soar Kernel  9.3.2 08-06-12
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Functions
reinforcement_learning.cpp File Reference
#include <portability.h>
#include <cstdlib>
#include <cmath>
#include <vector>
#include <fstream>
#include <sstream>
#include "agent.h"
#include "reinforcement_learning.h"
#include "production.h"
#include "rhsfun.h"
#include "instantiations.h"
#include "rete.h"
#include "wmem.h"
#include "tempmem.h"
#include "print.h"
#include "xml.h"
#include "utilities.h"
#include "recmem.h"

Go to the source code of this file.

Functions

Symbolinstantiate_rhs_value (agent *thisAgent, rhs_value rv, goal_stack_level new_id_level, char new_id_letter, struct token_struct *tok, wme *w)
void rl_add_goal_or_impasse_tests_to_conds (agent *my_agent, condition *all_conds)
void rl_add_ref (Symbol *goal, production *prod)
Symbolrl_build_template_instantiation (agent *my_agent, instantiation *my_template_instance, struct token_struct *tok, wme *w)
void rl_clear_refs (Symbol *goal)
bool rl_enabled (agent *my_agent)
void rl_get_symbol_constant (Symbol *p_sym, Symbol *i_sym, rl_symbol_map *constants)
void rl_get_template_constants (condition *p_conds, condition *i_conds, rl_symbol_map *constants)
int rl_get_template_id (const char *prod_name)
void rl_get_test_constant (test *p_test, test *i_test, rl_symbol_map *constants)
void rl_initialize_template_tracking (agent *my_agent)
actionrl_make_simple_action (agent *my_agent, Symbol *id_sym, Symbol *attr_sym, Symbol *val_sym, Symbol *ref_sym)
int rl_next_template_id (agent *my_agent)
void rl_perform_update (agent *my_agent, double op_value, bool op_rl, Symbol *goal, bool update_efr)
void rl_remove_ref (Symbol *goal, production *prod)
void rl_remove_refs_for_prod (agent *my_agent, production *prod)
void rl_reset_data (agent *)
void rl_revert_template_id (agent *my_agent)
void rl_rule_meta (agent *my_agent, production *prod)
void rl_store_data (agent *my_agent, Symbol *goal, preference *cand)
void rl_tabulate_reward_value_for_goal (agent *my_agent, Symbol *goal)
void rl_tabulate_reward_values (agent *my_agent)
void rl_update_template_tracking (agent *my_agent, const char *rule_name)
bool rl_valid_rule (production *prod)
bool rl_valid_template (production *prod)
void rl_watkins_clear (agent *, Symbol *goal)
void variablize_condition_list (agent *thisAgent, condition *cond)
void variablize_nots_and_insert_into_conditions (agent *thisAgent, not_struct *nots, condition *conds)
void variablize_symbol (agent *thisAgent, Symbol **sym)

Function Documentation

Symbol* instantiate_rhs_value ( agent thisAgent,
rhs_value  rv,
goal_stack_level  new_id_level,
char  new_id_letter,
struct token_struct tok,
wme w 
)

Definition at line 208 of file recmem.cpp.

References allocate_cons(), identifier_struct::common_symbol_info, rhs_function_struct::f, FALSE, agent_struct::firer_highest_rhs_unboundvar_index, cons_struct::first, free_list(), get_symbol_from_rete_loc(), symbol_union::id, IDENTIFIER_SYMBOL_TYPE, instantiate_rhs_value(), identifier_struct::level, make_new_identifier(), variable_struct::name, NIL, identifier_struct::promotion_level, cons_struct::rest, rhs_value_is_reteloc(), rhs_value_is_symbol(), rhs_value_is_unboundvar(), rhs_value_to_funcall_list(), rhs_value_to_reteloc_field_num(), rhs_value_to_reteloc_levels_up(), rhs_value_to_symbol(), rhs_value_to_unboundvar(), agent_struct::rhs_variable_bindings, identifier_struct::smem_lti, SMEM_LTI_UNKNOWN_LEVEL, symbol_add_ref(), symbol_remove_ref(), symbol_common_data_struct::symbol_type, agent_struct::timers_cpu, agent_struct::timers_kernel, agent_struct::timers_total_cpu_time, agent_struct::timers_total_kernel_time, TRUE, rhs_function_struct::user_data, symbol_union::var, and VARIABLE_SYMBOL_TYPE.

Referenced by execute_action(), instantiate_rhs_value(), and rl_build_template_instantiation().

{
list *fl;
list *arglist;
cons *c, *prev_c, *arg_cons;
Symbol *result;
Bool nil_arg_found;
result = rhs_value_to_symbol(rv);
/*
Long-Winded Case-by-Case [Hopeful] Explanation
This has to do with long-term identifiers (LTIs) that exist within productions (including chunks/justifications).
The real issue is that identifiers, upon creation, require a goal level (used for promotion/demotion/garbage collection).
At the time of parsing a rule, we don't have this information, so we give it an invalid "unknown" value.
This is OK on the condition side of a rule, since the rete (we think) will just consider it another symbol used for matching.
However, it becomes hairy when LTIs are on the action side of a rule, with respect to the state of the LTI in working memory and the rule LHS.
Consider the following cases:
1. Identifier is LTI, does NOT exist as a LHS symbol
- we do NOT support this!!! bad things will likely happen due to potential for adding an identifier to working memory
with an unknown goal level.
2. Attribute/Value is LTI, does NOT exist as a LHS symbol (!!!!!IMPORTANT CASE!!!!!)
- the caller of this function will supply new_id_level (probably based upon the level of the id).
- if this is valid (i.e. greater than 0), we use it. else, ignore.
- we have a huge assert on add_wme_to_wm that will kill soar if we try to add an identifier to working memory with an invalid level.
3. Identifier/Attribute/Value is LTI, DOES exist as LHS symbol
- in this situation, we are *guaranteed* that the resulting LTI (since it is in WM) has a valid goal level.
- it should be noted that if a value, the level of the LTI may change during promotion/demotion/garbage collection,
but this is natural Soar behavior and outside our perview.
*/
( result->id.smem_lti != NIL ) &&
( result->id.level == SMEM_LTI_UNKNOWN_LEVEL ) &&
( new_id_level > 0 ) )
{
result->id.level = new_id_level;
result->id.promotion_level = new_id_level;
}
symbol_add_ref (result);
return result;
}
int64_t index;
Symbol *sym;
index = static_cast<int64_t>(rhs_value_to_unboundvar(rv));
if (thisAgent->firer_highest_rhs_unboundvar_index < index)
sym = *(thisAgent->rhs_variable_bindings+index);
if (!sym) {
sym = make_new_identifier (thisAgent, new_id_letter, new_id_level);
*(thisAgent->rhs_variable_bindings+index) = sym;
return sym;
} else if (sym->common.symbol_type==VARIABLE_SYMBOL_TYPE) {
new_id_letter = *(sym->var.name + 1);
sym = make_new_identifier (thisAgent, new_id_letter, new_id_level);
*(thisAgent->rhs_variable_bindings+index) = sym;
return sym;
} else {
return sym;
}
}
tok, w);
symbol_add_ref (result);
return result;
}
rf = static_cast<rhs_function_struct *>(fl->first);
/* --- build up a list of the argument values --- */
prev_c = NIL;
nil_arg_found = FALSE;
arglist = NIL; /* unnecessary, but gcc -Wall warns without it */
for (arg_cons=fl->rest; arg_cons!=NIL; arg_cons=arg_cons->rest) {
allocate_cons (thisAgent, &c);
c->first = instantiate_rhs_value (thisAgent,
static_cast<char *>(arg_cons->first),
new_id_level, new_id_letter, tok, w);
if (! c->first) nil_arg_found = TRUE;
if (prev_c) prev_c->rest = c; else arglist = c;
prev_c = c;
}
if (prev_c) prev_c->rest = NIL; else arglist = NIL;
/* --- if all args were ok, call the function --- */
if (!nil_arg_found) {
// stop the kernel timer while doing RHS funcalls KJC 11/04
// the total_cpu timer needs to be updated in case RHS fun is statsCmd
#ifndef NO_TIMING_STUFF
thisAgent->timers_kernel.stop();
thisAgent->timers_cpu.stop();
thisAgent->timers_total_kernel_time.update(thisAgent->timers_kernel);
thisAgent->timers_total_cpu_time.update(thisAgent->timers_cpu);
thisAgent->timers_cpu.start();
#endif
result = (*(rf->f))(thisAgent, arglist, rf->user_data);
#ifndef NO_TIMING_STUFF // restart the kernel timer
thisAgent->timers_kernel.start();
#endif
} else
result = NIL;
/* --- scan through arglist, dereference symbols and deallocate conses --- */
for (c=arglist; c!=NIL; c=c->rest)
if (c->first) symbol_remove_ref (thisAgent, static_cast<Symbol *>(c->first));
free_list (thisAgent, arglist);
return result;
}
void rl_add_goal_or_impasse_tests_to_conds ( agent my_agent,
condition all_conds 
)

Definition at line 692 of file reinforcement_learning.cpp.

References add_new_test_to_test(), agent_struct::complex_test_pool, get_new_tc_number(), GOAL_ID_TEST, wme_struct::id, symbol_union::id, IMPASSE_ID_TEST, identifier_struct::isa_goal, identifier_struct::isa_impasse, make_test_from_complex_test(), NIL, POSITIVE_CONDITION, referent_of_equality_test(), identifier_struct::tc_num, and complex_test_struct::type.

Referenced by rl_build_template_instantiation().

{
// mark each id as we add a test for it, so we don't add a test for the same id in two different places
Symbol *id;
test t;
tc_number tc = get_new_tc_number( my_agent );
for ( condition *cond = all_conds; cond != NIL; cond = cond->next )
{
if ( cond->type != POSITIVE_CONDITION )
continue;
id = referent_of_equality_test( cond->data.tests.id_test );
if ( ( id->id.isa_goal || id->id.isa_impasse ) && ( id->id.tc_num != tc ) )
{
allocate_with_pool( my_agent, &my_agent->complex_test_pool, &ct );
ct->type = static_cast<byte>( ( id->id.isa_goal )?( GOAL_ID_TEST ):( IMPASSE_ID_TEST ) );
add_new_test_to_test( my_agent, &( cond->data.tests.id_test ), t );
id->id.tc_num = tc;
}
}
}
void rl_add_ref ( Symbol goal,
production prod 
)
inline
Symbol* rl_build_template_instantiation ( agent my_agent,
instantiation my_template_instance,
struct token_struct tok,
wme w 
)

Definition at line 540 of file reinforcement_learning.cpp.

References production_struct::action_list, add_production_to_rete(), wme_struct::attr, action_struct::attr, copy_condition_list(), deallocate_condition_list(), DUPLICATE_PRODUCTION, excise_production(), FALSE, symbol_union::fc, find_sym_constant(), first_letter_from_symbol(), FLOAT_CONSTANT_SYMBOL_TYPE, get_new_tc_number(), symbol_union::ic, wme_struct::id, action_struct::id, symbol_union::id, instantiate_rhs_value(), INT_CONSTANT_SYMBOL_TYPE, identifier_struct::level, make_production(), make_sym_constant(), production_struct::name, sym_constant_struct::name, NIL, instantiation_struct::nots, NUMERIC_INDIFFERENT_PREFERENCE_TYPE, production_struct::p_node, p_node_to_conditions_and_nots(), action_struct::preference_type, instantiation_struct::prod, action_struct::referent, reset_variable_generator(), rl_add_goal_or_impasse_tests_to_conds(), production_struct::rl_ecr, production_struct::rl_efr, rl_get_template_constants(), rl_make_simple_action(), rl_next_template_id(), rl_revert_template_id(), production_struct::rl_template_conds, production_struct::rl_template_instantiations, symbol_union::sc, symbol_remove_ref(), instantiation_struct::top_of_instantiated_conditions, TRUE, USER_PRODUCTION_TYPE, wme_struct::value, int_constant_struct::value, float_constant_struct::value, action_struct::value, agent_struct::variablization_tc, variablize_condition_list(), and variablize_nots_and_insert_into_conditions().

Referenced by create_instantiation().

{
Symbol* return_val = NULL;
// initialize production conditions
if ( my_template_instance->prod->rl_template_conds == NIL )
{
not_struct* nots;
condition* c_top;
condition* c_bottom;
p_node_to_conditions_and_nots( my_agent, my_template_instance->prod->p_node, NIL, NIL, &( c_top ), &( c_bottom ), &( nots ), NIL );
my_template_instance->prod->rl_template_conds = c_top;
}
// initialize production instantiation set
if ( my_template_instance->prod->rl_template_instantiations == NIL )
{
my_template_instance->prod->rl_template_instantiations = new rl_symbol_map_set;
}
// get constants
rl_symbol_map constant_map;
{
rl_get_template_constants( my_template_instance->prod->rl_template_conds, my_template_instance->top_of_instantiated_conditions, &( constant_map ) );
}
// try to insert into instantiation set
//if ( !constant_map.empty() )
{
std::pair< rl_symbol_map_set::iterator, bool > ins_result = my_template_instance->prod->rl_template_instantiations->insert( constant_map );
if ( ins_result.second )
{
Symbol *id, *attr, *value, *referent;
production *my_template = my_template_instance->prod;
action *my_action = my_template->action_list;
char first_letter;
double init_value = 0;
condition *cond_top, *cond_bottom;
// make unique production name
Symbol *new_name_symbol;
std::string new_name = "";
std::string empty_string = "";
std::string temp_id;
int new_id;
do
{
new_id = rl_next_template_id( my_agent );
to_string( new_id, temp_id );
new_name = ( "rl*" + empty_string + my_template->name->sc.name + "*" + temp_id );
} while ( find_sym_constant( my_agent, new_name.c_str() ) != NIL );
new_name_symbol = make_sym_constant( my_agent, new_name.c_str() );
// prep conditions
copy_condition_list( my_agent, my_template_instance->top_of_instantiated_conditions, &cond_top, &cond_bottom );
reset_variable_generator( my_agent, cond_top, NIL );
my_agent->variablization_tc = get_new_tc_number( my_agent );
variablize_condition_list( my_agent, cond_top );
variablize_nots_and_insert_into_conditions( my_agent, my_template_instance->nots, cond_top );
// get the preference value
id = instantiate_rhs_value( my_agent, my_action->id, -1, 's', tok, w );
attr = instantiate_rhs_value( my_agent, my_action->attr, id->id.level, 'a', tok, w );
first_letter = first_letter_from_symbol( attr );
value = instantiate_rhs_value( my_agent, my_action->value, id->id.level, first_letter, tok, w );
referent = instantiate_rhs_value( my_agent, my_action->referent, id->id.level, first_letter, tok, w );
// clean up after yourself :)
symbol_remove_ref( my_agent, id );
symbol_remove_ref( my_agent, attr );
symbol_remove_ref( my_agent, value );
symbol_remove_ref( my_agent, referent );
// make new action list
action *new_action = rl_make_simple_action( my_agent, id, attr, value, referent );
// make new production
production *new_production = make_production( my_agent, USER_PRODUCTION_TYPE, new_name_symbol, &cond_top, &cond_bottom, &new_action, false );
// set initial expected reward values
{
if ( referent->common.symbol_type == INT_CONSTANT_SYMBOL_TYPE )
{
init_value = static_cast< double >( referent->ic.value );
}
else if ( referent->common.symbol_type == FLOAT_CONSTANT_SYMBOL_TYPE )
{
init_value = referent->fc.value;
}
new_production->rl_ecr = 0.0;
new_production->rl_efr = init_value;
}
// attempt to add to rete, remove if duplicate
if ( add_production_to_rete( my_agent, new_production, cond_top, NULL, FALSE, TRUE ) == DUPLICATE_PRODUCTION )
{
excise_production( my_agent, new_production, false );
rl_revert_template_id( my_agent );
new_name_symbol = NULL;
}
deallocate_condition_list( my_agent, cond_top );
return_val = new_name_symbol;
}
}
return return_val;
}
void rl_clear_refs ( Symbol goal)

Definition at line 253 of file reinforcement_learning.cpp.

References symbol_union::id, rl_data_struct::prev_op_rl_rules, and identifier_struct::rl_info.

Referenced by remove_existing_context_and_descendents(), rl_reset_data(), and rl_store_data().

{
for ( rl_rule_list::iterator p=rules->begin(); p!=rules->end(); p++ )
{
(*p)->rl_ref_count--;
}
rules->clear();
}
bool rl_enabled ( agent my_agent)
void rl_get_symbol_constant ( Symbol p_sym,
Symbol i_sym,
rl_symbol_map constants 
)
inline

Definition at line 454 of file reinforcement_learning.cpp.

References symbol_union::id, IDENTIFIER_SYMBOL_TYPE, NIL, identifier_struct::smem_lti, and VARIABLE_SYMBOL_TYPE.

Referenced by rl_get_test_constant().

{
if ( ( p_sym->common.symbol_type == VARIABLE_SYMBOL_TYPE ) && ( ( i_sym->common.symbol_type != IDENTIFIER_SYMBOL_TYPE ) || ( i_sym->id.smem_lti != NIL ) ) )
{
constants->insert( std::make_pair< Symbol*, Symbol* >( p_sym, i_sym ) );
}
}
void rl_get_template_constants ( condition p_conds,
condition i_conds,
rl_symbol_map constants 
)
int rl_get_template_id ( const char *  prod_name)

Definition at line 395 of file reinforcement_learning.cpp.

References wme_struct::id, and is_whole_number().

Referenced by rl_update_template_tracking().

{
std::string temp = prod_name;
// has to be at least "rl*a*#" (where a is a single letter/number/etc)
if ( temp.length() < 6 )
return -1;
// check first three letters are "rl*"
if ( temp.compare( 0, 3, "rl*" ) )
return -1;
// find last * to isolate id
std::string::size_type last_star = temp.find_last_of( '*' );
if ( last_star == std::string::npos )
return -1;
// make sure there's something left after last_star
if ( last_star == ( temp.length() - 1 ) )
return -1;
// make sure id is a valid natural number
std::string id_str = temp.substr( last_star + 1 );
if ( !is_whole_number( id_str ) )
return -1;
// convert id
int id;
from_string( id, id_str );
return id;
}
void rl_get_test_constant ( test p_test,
test i_test,
rl_symbol_map constants 
)

Definition at line 462 of file reinforcement_learning.cpp.

References rl_get_symbol_constant(), test_is_blank_or_equality_test(), and test_is_blank_test().

Referenced by rl_get_template_constants().

{
if ( test_is_blank_test( *p_test ) )
{
return;
}
{
rl_get_symbol_constant( *(reinterpret_cast<Symbol**>( p_test )), *(reinterpret_cast<Symbol**>( i_test )), constants );
return;
}
// complex test stuff
// NLD: If the code below is uncommented, it accesses bad memory on the first
// id test and segfaults. I'm honestly unsure why (perhaps something
// about state test?). Most of this code was copied/adapted from
// the variablize_test code in production.cpp.
/*
{
complex_test* p_ct = complex_test_from_test( *p_test );
complex_test* i_ct = complex_test_from_test( *i_test );
if ( ( p_ct->type == GOAL_ID_TEST ) || ( p_ct->type == IMPASSE_ID_TEST ) || ( p_ct->type == DISJUNCTION_TEST ) )
{
return;
}
else if ( p_ct->type == CONJUNCTIVE_TEST )
{
cons* p_c=p_ct->data.conjunct_list;
cons* i_c=i_ct->data.conjunct_list;
while ( p_c )
{
rl_get_test_constant( reinterpret_cast<test*>( &( p_c->first ) ), reinterpret_cast<test*>( &( i_c->first ) ), constants );
p_c = p_c->rest;
i_c = i_c->rest;
}
return;
}
else
{
rl_get_symbol_constant( p_ct->data.referent, i_ct->data.referent, constants );
return;
}
}
*/
}
void rl_initialize_template_tracking ( agent my_agent)

Definition at line 428 of file reinforcement_learning.cpp.

References agent_struct::rl_template_count.

Referenced by create_soar_agent().

{
my_agent->rl_template_count = 1;
}
action* rl_make_simple_action ( agent my_agent,
Symbol id_sym,
Symbol attr_sym,
Symbol val_sym,
Symbol ref_sym 
)

Definition at line 656 of file reinforcement_learning.cpp.

References agent_struct::action_pool, action_struct::attr, action_struct::id, MAKE_ACTION, action_struct::next, NIL, action_struct::referent, symbol_add_ref(), symbol_to_rhs_value(), action_struct::type, action_struct::value, and variablize_symbol().

Referenced by rl_build_template_instantiation().

{
action *rhs;
Symbol *temp;
allocate_with_pool( my_agent, &my_agent->action_pool, &rhs );
rhs->next = NIL;
rhs->type = MAKE_ACTION;
// id
temp = id_sym;
symbol_add_ref( temp );
variablize_symbol( my_agent, &temp );
rhs->id = symbol_to_rhs_value( temp );
// attribute
temp = attr_sym;
symbol_add_ref( temp );
variablize_symbol( my_agent, &temp );
rhs->attr = symbol_to_rhs_value( temp );
// value
temp = val_sym;
symbol_add_ref( temp );
variablize_symbol( my_agent, &temp );
rhs->value = symbol_to_rhs_value( temp );
// referent
temp = ref_sym;
symbol_add_ref( temp );
variablize_symbol( my_agent, &temp );
rhs->referent = symbol_to_rhs_value( temp );
return rhs;
}
int rl_next_template_id ( agent my_agent)

Definition at line 443 of file reinforcement_learning.cpp.

References agent_struct::rl_template_count.

Referenced by rl_build_template_instantiation().

{
return (my_agent->rl_template_count++);
}
void rl_perform_update ( agent my_agent,
double  op_value,
bool  op_rl,
Symbol goal,
bool  update_efr 
)

Definition at line 849 of file reinforcement_learning.cpp.

References production_struct::action_list, rl_param_container::decay_mode, rl_param_container::delta_bar_delta_decay, rl_param_container::discount_rate, production_struct::documentation, rl_data_struct::eligibility_traces, rl_param_container::et_decay_rate, rl_param_container::et_tolerance, rl_param_container::exponential_decay, free_memory_block_for_string(), rl_data_struct::gap_age, rl_param_container::get_documentation_params(), soar_module::primitive_param< T >::get_value(), soar_module::string_param::get_value(), soar_module::constant_param< T >::get_value(), rl_data_struct::hrl_age, symbol_union::id, preference_struct::inst_next, production_struct::instantiations, rl_param_container::learning_rate, rl_param_container::logarithmic_decay, make_float_constant(), make_memory_block_for_string(), rl_param_container::meta, rl_param_container::meta_learning_rate, production_struct::name, sym_constant_struct::name, identifier_struct::name_letter, identifier_struct::name_number, instantiation_struct::next, rl_param_container::normal_decay, soar_module::on, rl_data_struct::prev_op_rl_rules, print(), action_struct::referent, rl_data_struct::reward, rhs_value_to_symbol(), production_struct::rl_delta_bar_delta_beta, production_struct::rl_delta_bar_delta_h, production_struct::rl_ecr, production_struct::rl_efr, identifier_struct::rl_info, agent_struct::rl_params, production_struct::rl_update_count, symbol_union::sc, symbol_remove_ref(), symbol_to_rhs_value(), agent_struct::sysparams, rl_param_container::temporal_discount, rl_param_container::temporal_extension, TRACE_RL_SYSPARAM, rl_param_container::update_log_path, xml_generate_message(), and xml_generate_warning().

Referenced by do_one_top_level_phase(), exploration_choose_according_to_policy(), remove_existing_context_and_descendents(), require_preference_semantics(), and run_preference_semantics().

{
bool using_gaps = ( my_agent->rl_params->temporal_extension->get_value() == soar_module::on );
if ( !using_gaps || op_rl )
{
rl_data *data = goal->id.rl_info;
if ( !data->prev_op_rl_rules->empty() )
{
rl_et_map::iterator iter;
double alpha = my_agent->rl_params->learning_rate->get_value();
double lambda = my_agent->rl_params->et_decay_rate->get_value();
double gamma = my_agent->rl_params->discount_rate->get_value();
double tolerance = my_agent->rl_params->et_tolerance->get_value();
double theta = my_agent->rl_params->meta_learning_rate->get_value();
// if temporal_discount is off, don't discount for gaps
unsigned int effective_age = data->hrl_age + 1;
effective_age += data->gap_age;
}
double discount = pow( gamma, static_cast< double >( effective_age ) );
// notify of gap closure
if ( data->gap_age && using_gaps && my_agent->sysparams[ TRACE_RL_SYSPARAM ] )
{
char buf[256];
SNPRINTF( buf, 254, "gap ended (%c%llu)", goal->id.name_letter, static_cast<long long unsigned>(goal->id.name_number) );
print( my_agent, buf );
xml_generate_warning( my_agent, buf );
}
// Iterate through eligibility_traces, decay traces. If less than TOLERANCE, remove from map.
if ( lambda == 0 )
{
if ( !data->eligibility_traces->empty() )
{
data->eligibility_traces->clear();
}
}
else
{
for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); )
{
iter->second *= lambda;
iter->second *= discount;
if ( iter->second < tolerance )
{
data->eligibility_traces->erase( iter++ );
}
else
{
++iter;
}
}
}
// Update trace for just fired prods
double sum_old_ecr = 0.0;
double sum_old_efr = 0.0;
if ( !data->prev_op_rl_rules->empty() )
{
double trace_increment = ( 1.0 / static_cast<double>( data->prev_op_rl_rules->size() ) );
rl_rule_list::iterator p;
for ( p=data->prev_op_rl_rules->begin(); p!=data->prev_op_rl_rules->end(); p++ )
{
sum_old_ecr += (*p)->rl_ecr;
sum_old_efr += (*p)->rl_efr;
iter = data->eligibility_traces->find( (*p) );
if ( iter != data->eligibility_traces->end() )
{
iter->second += trace_increment;
}
else
{
(*data->eligibility_traces)[ (*p) ] = trace_increment;
}
}
}
// For each prod with a trace, perform update
{
double old_ecr, old_efr;
double delta_ecr, delta_efr;
double new_combined, new_ecr, new_efr;
double delta_t = (data->reward + discount * op_value) - (sum_old_ecr + sum_old_efr);
for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); iter++ )
{
production *prod = iter->first;
// get old vals
old_ecr = prod->rl_ecr;
old_efr = prod->rl_efr;
// Adjust alpha based on decay policy
// Miller 11/14/2011
double adjusted_alpha;
switch (my_agent->rl_params->decay_mode->get_value())
{
adjusted_alpha = 1.0 / (prod->rl_update_count + 1.0);
break;
adjusted_alpha = 1.0 / (log(prod->rl_update_count + 1.0) + 1.0);
break;
{
// Note that in this case, x_i = 1.0 for all productions that are being updated.
// Those values have been included here for consistency with the algorithm as described in the delta bar delta paper.
prod->rl_delta_bar_delta_beta = prod->rl_delta_bar_delta_beta + theta * delta_t * 1.0 * prod->rl_delta_bar_delta_h;
adjusted_alpha = exp(prod->rl_delta_bar_delta_beta);
double decay_term = 1.0 - adjusted_alpha * 1.0 * 1.0;
if (decay_term < 0.0) decay_term = 0.0;
prod->rl_delta_bar_delta_h = prod->rl_delta_bar_delta_h * decay_term + adjusted_alpha * delta_t * 1.0;
break;
}
default:
adjusted_alpha = alpha;
break;
}
// calculate updates
delta_ecr = ( adjusted_alpha * iter->second * ( data->reward - sum_old_ecr ) );
if ( update_efr )
{
delta_efr = ( adjusted_alpha * iter->second * ( ( discount * op_value ) - sum_old_efr ) );
}
else
{
delta_efr = 0.0;
}
// calculate new vals
new_ecr = ( old_ecr + delta_ecr );
new_efr = ( old_efr + delta_efr );
new_combined = ( new_ecr + new_efr );
// print as necessary
if ( my_agent->sysparams[ TRACE_RL_SYSPARAM ] )
{
std::ostringstream ss;
ss << "RL update " << prod->name->sc.name << " "
<< old_ecr << " " << old_efr << " " << old_ecr + old_efr << " -> "
<< new_ecr << " " << new_efr << " " << new_combined ;
std::string temp_str( ss.str() );
print( my_agent, "%s\n", temp_str.c_str() );
xml_generate_message( my_agent, temp_str.c_str() );
// Log update to file if the log file has been set
std::string log_path = my_agent->rl_params->update_log_path->get_value();
if (!log_path.empty()) {
std::ofstream file(log_path.c_str(), std::ios_base::app);
file << ss.str() << std::endl;
file.close();
}
}
// Change value of rule
prod->action_list->referent = symbol_to_rhs_value( make_float_constant( my_agent, new_combined ) );
prod->rl_update_count += 1;
prod->rl_ecr = new_ecr;
prod->rl_efr = new_efr;
// change documentation
if ( my_agent->rl_params->meta->get_value() == soar_module::on )
{
if ( prod->documentation )
{
}
std::stringstream doc_ss;
const std::vector<std::pair<std::string, param_accessor<double> *> > &documentation_params = my_agent->rl_params->get_documentation_params();
for (std::vector<std::pair<std::string, param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin();
doc_params_it != documentation_params.end(); ++doc_params_it) {
doc_ss << doc_params_it->first << "=" << doc_params_it->second->get_param(prod) << ";";
}
prod->documentation = make_memory_block_for_string(my_agent, doc_ss.str().c_str());
/*
std::string rlupdates( "rlupdates=" );
std::string val;
to_string( static_cast< uint64_t >( prod->rl_update_count ), val );
rlupdates.append( val );
prod->documentation = make_memory_block_for_string( my_agent, rlupdates.c_str() );
*/
}
// Change value of preferences generated by current instantiations of this rule
if ( prod->instantiations )
{
for ( instantiation *inst = prod->instantiations; inst; inst = inst->next )
{
for ( preference *pref = inst->preferences_generated; pref; pref = pref->inst_next )
{
symbol_remove_ref( my_agent, pref->referent );
pref->referent = make_float_constant( my_agent, new_combined );
}
}
}
}
}
}
data->gap_age = 0;
data->hrl_age = 0;
data->reward = 0.0;
}
}
void rl_remove_ref ( Symbol goal,
production prod 
)
inline

Definition at line 238 of file reinforcement_learning.cpp.

References symbol_union::id, rl_data_struct::prev_op_rl_rules, identifier_struct::rl_info, and production_struct::rl_ref_count.

Referenced by rl_remove_refs_for_prod().

{
for ( rl_rule_list::iterator p=rules->begin(); p!=rules->end(); p++ )
{
if ( *p == prod )
{
prod->rl_ref_count--;
}
}
rules->remove( prod );
}
void rl_remove_refs_for_prod ( agent my_agent,
production prod 
)

Definition at line 290 of file reinforcement_learning.cpp.

References symbol_union::id, identifier_struct::lower_goal, rl_remove_ref(), and agent_struct::top_state.

Referenced by excise_production().

{
for ( Symbol* state = my_agent->top_state; state; state = state->id.lower_goal )
{
state->id.rl_info->eligibility_traces->erase( prod );
rl_remove_ref( state, prod );
}
}
void rl_reset_data ( agent my_agent)
void rl_revert_template_id ( agent my_agent)

Definition at line 449 of file reinforcement_learning.cpp.

References agent_struct::rl_template_count.

Referenced by rl_build_template_instantiation().

{
my_agent->rl_template_count--;
}
void rl_rule_meta ( agent my_agent,
production prod 
)

Definition at line 350 of file reinforcement_learning.cpp.

References production_struct::documentation, rl_param_container::get_documentation_params(), soar_module::constant_param< T >::get_value(), rl_param_container::meta, soar_module::on, agent_struct::rl_params, and param_accessor< T >::set_param().

Referenced by parse_production(), and reteload_node_and_children().

{
if ( prod->documentation && ( my_agent->rl_params->meta->get_value() == soar_module::on ) )
{
std::string doc( prod->documentation );
const std::vector<std::pair<std::string, param_accessor<double> *> > &documentation_params = my_agent->rl_params->get_documentation_params();
for (std::vector<std::pair<std::string, param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin();
doc_params_it != documentation_params.end(); ++doc_params_it) {
const std::string &param_name = doc_params_it->first;
param_accessor<double> *accessor = doc_params_it->second;
std::stringstream param_name_ss;
param_name_ss << param_name << "=";
std::string search_term = param_name_ss.str();
size_t begin_index = doc.find(search_term);
if (begin_index == std::string::npos) continue;
begin_index += search_term.size();
size_t end_index = doc.find(";", begin_index);
if (end_index == std::string::npos) continue;
std::string param_value_str = doc.substr(begin_index, end_index);
accessor->set_param(prod, param_value_str);
}
/*
std::string search( "rlupdates=" );
if ( doc.length() > search.length() )
{
if ( doc.substr( 0, search.length() ).compare( search ) == 0 )
{
uint64_t val;
from_string( val, doc.substr( search.length() ) );
prod->rl_update_count = static_cast< double >( val );
}
}
*/
}
}
void rl_store_data ( agent my_agent,
Symbol goal,
preference cand 
)

Definition at line 790 of file reinforcement_learning.cpp.

References rl_data_struct::gap_age, soar_module::constant_param< T >::get_value(), symbol_union::id, identifier_struct::name_letter, identifier_struct::name_number, preference_struct::next, NUMERIC_INDIFFERENT_PREFERENCE_TYPE, preference_struct::numeric_value, soar_module::on, identifier_struct::operator_slot, slot_struct::preferences, rl_data_struct::prev_op_rl_rules, rl_data_struct::previous_q, print(), rl_add_ref(), rl_clear_refs(), identifier_struct::rl_info, agent_struct::rl_params, agent_struct::sysparams, rl_param_container::temporal_extension, TRACE_RL_SYSPARAM, preference_struct::value, and xml_generate_warning().

Referenced by decide_context_slot().

{
rl_data *data = goal->id.rl_info;
Symbol *op = cand->value;
bool using_gaps = ( my_agent->rl_params->temporal_extension->get_value() == soar_module::on );
// Make list of just-fired prods
unsigned int just_fired = 0;
for ( preference *pref = goal->id.operator_slot->preferences[ NUMERIC_INDIFFERENT_PREFERENCE_TYPE ]; pref; pref = pref->next )
{
if ( ( op == pref->value ) && pref->inst->prod->rl_rule )
{
if ( ( just_fired == 0 ) && !data->prev_op_rl_rules->empty() )
{
rl_clear_refs( goal );
}
rl_add_ref( goal, pref->inst->prod );
just_fired++;
}
}
if ( just_fired )
{
data->previous_q = cand->numeric_value;
}
else
{
if ( my_agent->sysparams[ TRACE_RL_SYSPARAM ] && using_gaps &&
( data->gap_age == 0 ) && !data->prev_op_rl_rules->empty() )
{
char buf[256];
SNPRINTF( buf, 254, "gap started (%c%llu)", goal->id.name_letter, static_cast<long long unsigned>(goal->id.name_number) );
print( my_agent, buf );
xml_generate_warning( my_agent, buf );
}
if ( !using_gaps )
{
if ( !data->prev_op_rl_rules->empty() )
{
rl_clear_refs( goal );
}
data->previous_q = cand->numeric_value;
}
else
{
if ( !data->prev_op_rl_rules->empty() )
{
data->gap_age++;
}
}
}
}
void rl_tabulate_reward_value_for_goal ( agent my_agent,
Symbol goal 
)

Definition at line 723 of file reinforcement_learning.cpp.

References agent_struct::bottom_goal, rl_param_container::discount_rate, find_slot(), FLOAT_CONSTANT_SYMBOL_TYPE, rl_data_struct::gap_age, get_number_from_symbol(), soar_module::primitive_param< T >::get_value(), soar_module::constant_param< T >::get_value(), soar_module::primitive_stat< T >::get_value(), rl_stat_container::global_reward, rl_data_struct::hrl_age, rl_param_container::hrl_discount, symbol_union::id, IDENTIFIER_SYMBOL_TYPE, INT_CONSTANT_SYMBOL_TYPE, wme_struct::next, soar_module::on, rl_data_struct::prev_op_rl_rules, rl_data_struct::reward, identifier_struct::reward_header, identifier_struct::rl_info, agent_struct::rl_params, agent_struct::rl_stats, agent_struct::rl_sym_reward, agent_struct::rl_sym_value, soar_module::primitive_stat< T >::set_value(), rl_param_container::temporal_discount, rl_stat_container::total_reward, wme_struct::value, and slot_struct::wmes.

Referenced by do_one_top_level_phase(), remove_existing_context_and_descendents(), and rl_tabulate_reward_values().

{
rl_data *data = goal->id.rl_info;
if ( !data->prev_op_rl_rules->empty() )
{
slot *s = find_slot( goal->id.reward_header, my_agent->rl_sym_reward );
slot *t;
wme *w, *x;
double reward = 0.0;
double discount_rate = my_agent->rl_params->discount_rate->get_value();
if ( s )
{
for ( w=s->wmes; w; w=w->next )
{
if ( w->value->common.symbol_type == IDENTIFIER_SYMBOL_TYPE )
{
t = find_slot( w->value, my_agent->rl_sym_value );
if ( t )
{
for ( x=t->wmes; x; x=x->next )
{
if ( ( x->value->common.symbol_type == FLOAT_CONSTANT_SYMBOL_TYPE ) || ( x->value->common.symbol_type == INT_CONSTANT_SYMBOL_TYPE ) )
{
reward += get_number_from_symbol( x->value );
}
}
}
}
}
// if temporal_discount is off, don't discount for gaps
unsigned int effective_age = data->hrl_age;
effective_age += data->gap_age;
}
data->reward += ( reward * pow( discount_rate, static_cast< double >( effective_age ) ) );
}
// update stats
double global_reward = my_agent->rl_stats->global_reward->get_value();
my_agent->rl_stats->total_reward->set_value( reward );
my_agent->rl_stats->global_reward->set_value( global_reward + reward );
if ( ( goal != my_agent->bottom_goal ) && ( my_agent->rl_params->hrl_discount->get_value() == soar_module::on ) )
{
data->hrl_age++;
}
}
}
void rl_tabulate_reward_values ( agent my_agent)
void rl_update_template_tracking ( agent my_agent,
const char *  rule_name 
)

Definition at line 434 of file reinforcement_learning.cpp.

References rl_get_template_id(), and agent_struct::rl_template_count.

Referenced by make_production().

{
int new_id = rl_get_template_id( rule_name );
if ( ( new_id != -1 ) && ( new_id > my_agent->rl_template_count ) )
my_agent->rl_template_count = ( new_id + 1 );
}
bool rl_valid_rule ( production prod)

Definition at line 331 of file reinforcement_learning.cpp.

References production_struct::action_list, MAKE_ACTION, action_struct::next, and NUMERIC_INDIFFERENT_PREFERENCE_TYPE.

Referenced by make_production(), and reteload_node_and_children().

{
bool numeric_pref = false;
int num_actions = 0;
for ( action *a = prod->action_list; a; a = a->next )
{
num_actions++;
if ( a->type == MAKE_ACTION )
{
if ( a->preference_type == NUMERIC_INDIFFERENT_PREFERENCE_TYPE )
numeric_pref = true;
}
}
return ( numeric_pref && ( num_actions == 1 ) );
}
bool rl_valid_template ( production prod)

Definition at line 304 of file reinforcement_learning.cpp.

References production_struct::action_list, BINARY_INDIFFERENT_PREFERENCE_TYPE, identifier_struct::common_symbol_info, symbol_union::id, MAKE_ACTION, action_struct::next, NUMERIC_INDIFFERENT_PREFERENCE_TYPE, rhs_value_is_symbol(), rhs_value_to_symbol(), symbol_common_data_struct::symbol_type, and VARIABLE_SYMBOL_TYPE.

Referenced by parse_production().

{
bool numeric_pref = false;
bool var_pref = false;
int num_actions = 0;
for ( action *a = prod->action_list; a; a = a->next )
{
num_actions++;
if ( a->type == MAKE_ACTION )
{
if ( a->preference_type == NUMERIC_INDIFFERENT_PREFERENCE_TYPE )
{
numeric_pref = true;
}
else if ( a->preference_type == BINARY_INDIFFERENT_PREFERENCE_TYPE )
{
var_pref = true;
}
}
}
return ( ( num_actions == 1 ) && ( numeric_pref || var_pref ) );
}
void rl_watkins_clear ( agent ,
Symbol goal 
)
void variablize_condition_list ( agent thisAgent,
condition cond 
)
void variablize_nots_and_insert_into_conditions ( agent thisAgent,
not_struct nots,
condition conds 
)

Definition at line 576 of file chunk.cpp.

References abort_with_fatal_error(), add_new_test_to_test(), three_field_tests_struct::attr_test, BUFFER_MSG_SIZE, agent_struct::complex_test_pool, complex_test_struct::data, condition_struct::data, FALSE, symbol_union::id, three_field_tests_struct::id_test, make_test_from_complex_test(), not_struct::next, condition_struct::next, NIL, NOT_EQUAL_TEST, POSITIVE_CONDITION, complex_test_struct::test_info_union::referent, not_struct::s1, not_struct::s2, symbol_add_ref(), test_includes_equality_test_for_symbol(), condition_struct::condition_main_data_union::tests, TRUE, complex_test_struct::type, condition_struct::type, three_field_tests_struct::value_test, and identifier_struct::variablization.

Referenced by chunk_instantiation(), and rl_build_template_instantiation().

{
Symbol *var1, *var2;
test t;
Bool added_it;
for (n=nots; n!=NIL; n=n->next) {
var1 = n->s1->id.variablization;
var2 = n->s2->id.variablization;
/* --- find where var1 is bound, and add "<> var2" to that test --- */
allocate_with_pool (thisAgent, &thisAgent->complex_test_pool, &ct);
ct->data.referent = var2;
added_it = FALSE;
for (c=conds; c!=NIL; c=c->next) {
if (c->type != POSITIVE_CONDITION) continue;
var1)) {
add_new_test_to_test (thisAgent, &(c->data.tests.id_test), t);
added_it = TRUE;
break;
}
var1)) {
add_new_test_to_test (thisAgent, &(c->data.tests.attr_test), t);
added_it = TRUE;
break;
}
var1)) {
add_new_test_to_test (thisAgent, &(c->data.tests.value_test), t);
added_it = TRUE;
break;
}
}
if (!added_it) {
char msg[BUFFER_MSG_SIZE];
strncpy (msg,"chunk.c: Internal error: couldn't add Not test to chunk\n", BUFFER_MSG_SIZE);
msg[BUFFER_MSG_SIZE - 1] = 0; /* ensure null termination */
abort_with_fatal_error(thisAgent, msg);
}
} /* end of for n=nots */
}
void variablize_symbol ( agent thisAgent,
Symbol **  sym 
)

Definition at line 179 of file chunk.cpp.

References generate_new_variable(), symbol_union::id, IDENTIFIER_SYMBOL_TYPE, NIL, symbol_add_ref(), symbol_remove_ref(), identifier_struct::tc_num, identifier_struct::variablization, and agent_struct::variablization_tc.

Referenced by copy_and_variablize_result_list(), rl_make_simple_action(), and variablize_test().

{
char prefix[2];
Symbol *var;
if ((*sym)->common.symbol_type!=IDENTIFIER_SYMBOL_TYPE) return; // only variablize identifiers
if ((*sym)->id.smem_lti != NIL) // don't variablize lti (long term identifiers)
{
(*sym)->id.tc_num = thisAgent->variablization_tc;
(*sym)->id.variablization = (*sym);
return;
}
if ((*sym)->id.tc_num == thisAgent->variablization_tc) {
/* --- it's already been variablized, so use the existing variable --- */
var = (*sym)->id.variablization;
symbol_remove_ref (thisAgent, *sym);
*sym = var;
return;
}
/* --- need to create a new variable --- */
(*sym)->id.tc_num = thisAgent->variablization_tc;
prefix[0] = static_cast<char>(tolower((*sym)->id.name_letter));
prefix[1] = 0;
var = generate_new_variable (thisAgent, prefix);
(*sym)->id.variablization = var;
symbol_remove_ref (thisAgent, *sym);
*sym = var;
}