Soar Kernel  9.3.2 08-06-12
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Data Structures | Macros | Typedefs | Functions
reinforcement_learning.h File Reference
#include <map>
#include <string>
#include <list>
#include <vector>
#include "soar_module.h"
#include "chunk.h"
#include "production.h"

Go to the source code of this file.

Data Structures

class  param_accessor< T >
class  rl_apoptosis_param
class  rl_apoptosis_predicate< T >
class  rl_apoptosis_thresh_param
struct  rl_data_struct
class  rl_dbd_h_accessor
class  rl_learning_param
class  rl_param_container
class  rl_stat_container
class  rl_updates_accessor

Macros

#define OP_NO_CHANGE_IMPASSE_TYPE   -2
#define STATE_NO_CHANGE_IMPASSE_TYPE   -1

Typedefs

typedef struct rl_data_struct rl_data
typedef std::map< production
*, double, std::less
< production * >
, soar_module::soar_memory_pool_allocator
< std::pair< production
*, double > > > 
rl_et_map
typedef
soar_module::bla_object_memory
< production, 10, 50 > 
rl_production_memory
typedef std::list< production
*, soar_module::soar_memory_pool_allocator
< production * > > 
rl_rule_list
typedef std::map< Symbol
*, Symbol * > 
rl_symbol_map
typedef std::set< rl_symbol_maprl_symbol_map_set

Functions

void rl_add_goal_or_impasse_tests_to_conds (agent *my_agent, condition *all_conds)
Symbolrl_build_template_instantiation (agent *my_agent, instantiation *my_template_instance, struct token_struct *tok, wme *w)
void rl_clear_refs (Symbol *goal)
bool rl_enabled (agent *my_agent)
int rl_get_template_id (const char *prod_name)
void rl_initialize_template_tracking (agent *my_agent)
actionrl_make_simple_action (agent *my_gent, Symbol *id_sym, Symbol *attr_sym, Symbol *val_sym, Symbol *ref_sym)
int rl_next_template_id (agent *my_agent)
void rl_perform_update (agent *my_agent, double op_value, bool op_rl, Symbol *goal, bool update_efr=true)
void rl_remove_refs_for_prod (agent *my_agent, production *prod)
void rl_revert_template_id (agent *my_agent)
void rl_rule_meta (agent *my_agent, production *prod)
void rl_store_data (agent *my_agent, Symbol *goal, preference *cand)
void rl_tabulate_reward_value_for_goal (agent *my_agent, Symbol *goal)
void rl_tabulate_reward_values (agent *my_agent)
void rl_update_template_tracking (agent *my_agent, const char *rule_name)
bool rl_valid_rule (production *prod)
bool rl_valid_template (production *prod)
void rl_watkins_clear (agent *my_agent, Symbol *goal)

Macro Definition Documentation

#define OP_NO_CHANGE_IMPASSE_TYPE   -2

Definition at line 33 of file reinforcement_learning.h.

#define STATE_NO_CHANGE_IMPASSE_TYPE   -1

Definition at line 32 of file reinforcement_learning.h.

Typedef Documentation

typedef struct rl_data_struct rl_data
typedef std::map< production*, double, std::less< production* >, soar_module::soar_memory_pool_allocator< std::pair< production*, double > > > rl_et_map

Definition at line 170 of file reinforcement_learning.h.

Definition at line 198 of file reinforcement_learning.h.

Definition at line 177 of file reinforcement_learning.h.

typedef std::map< Symbol*, Symbol* > rl_symbol_map

Definition at line 194 of file reinforcement_learning.h.

typedef std::set< rl_symbol_map > rl_symbol_map_set

Definition at line 195 of file reinforcement_learning.h.

Function Documentation

void rl_add_goal_or_impasse_tests_to_conds ( agent my_agent,
condition all_conds 
)

Definition at line 692 of file reinforcement_learning.cpp.

References add_new_test_to_test(), agent_struct::complex_test_pool, get_new_tc_number(), GOAL_ID_TEST, wme_struct::id, symbol_union::id, IMPASSE_ID_TEST, identifier_struct::isa_goal, identifier_struct::isa_impasse, make_test_from_complex_test(), NIL, POSITIVE_CONDITION, referent_of_equality_test(), identifier_struct::tc_num, and complex_test_struct::type.

Referenced by rl_build_template_instantiation().

{
// mark each id as we add a test for it, so we don't add a test for the same id in two different places
Symbol *id;
test t;
tc_number tc = get_new_tc_number( my_agent );
for ( condition *cond = all_conds; cond != NIL; cond = cond->next )
{
if ( cond->type != POSITIVE_CONDITION )
continue;
id = referent_of_equality_test( cond->data.tests.id_test );
if ( ( id->id.isa_goal || id->id.isa_impasse ) && ( id->id.tc_num != tc ) )
{
allocate_with_pool( my_agent, &my_agent->complex_test_pool, &ct );
ct->type = static_cast<byte>( ( id->id.isa_goal )?( GOAL_ID_TEST ):( IMPASSE_ID_TEST ) );
add_new_test_to_test( my_agent, &( cond->data.tests.id_test ), t );
id->id.tc_num = tc;
}
}
}
Symbol* rl_build_template_instantiation ( agent my_agent,
instantiation my_template_instance,
struct token_struct tok,
wme w 
)

Definition at line 540 of file reinforcement_learning.cpp.

References production_struct::action_list, add_production_to_rete(), wme_struct::attr, action_struct::attr, copy_condition_list(), deallocate_condition_list(), DUPLICATE_PRODUCTION, excise_production(), FALSE, symbol_union::fc, find_sym_constant(), first_letter_from_symbol(), FLOAT_CONSTANT_SYMBOL_TYPE, get_new_tc_number(), symbol_union::ic, wme_struct::id, action_struct::id, symbol_union::id, instantiate_rhs_value(), INT_CONSTANT_SYMBOL_TYPE, identifier_struct::level, make_production(), make_sym_constant(), production_struct::name, sym_constant_struct::name, NIL, instantiation_struct::nots, NUMERIC_INDIFFERENT_PREFERENCE_TYPE, production_struct::p_node, p_node_to_conditions_and_nots(), action_struct::preference_type, instantiation_struct::prod, action_struct::referent, reset_variable_generator(), rl_add_goal_or_impasse_tests_to_conds(), production_struct::rl_ecr, production_struct::rl_efr, rl_get_template_constants(), rl_make_simple_action(), rl_next_template_id(), rl_revert_template_id(), production_struct::rl_template_conds, production_struct::rl_template_instantiations, symbol_union::sc, symbol_remove_ref(), instantiation_struct::top_of_instantiated_conditions, TRUE, USER_PRODUCTION_TYPE, wme_struct::value, int_constant_struct::value, float_constant_struct::value, action_struct::value, agent_struct::variablization_tc, variablize_condition_list(), and variablize_nots_and_insert_into_conditions().

Referenced by create_instantiation().

{
Symbol* return_val = NULL;
// initialize production conditions
if ( my_template_instance->prod->rl_template_conds == NIL )
{
not_struct* nots;
condition* c_top;
condition* c_bottom;
p_node_to_conditions_and_nots( my_agent, my_template_instance->prod->p_node, NIL, NIL, &( c_top ), &( c_bottom ), &( nots ), NIL );
my_template_instance->prod->rl_template_conds = c_top;
}
// initialize production instantiation set
if ( my_template_instance->prod->rl_template_instantiations == NIL )
{
my_template_instance->prod->rl_template_instantiations = new rl_symbol_map_set;
}
// get constants
rl_symbol_map constant_map;
{
rl_get_template_constants( my_template_instance->prod->rl_template_conds, my_template_instance->top_of_instantiated_conditions, &( constant_map ) );
}
// try to insert into instantiation set
//if ( !constant_map.empty() )
{
std::pair< rl_symbol_map_set::iterator, bool > ins_result = my_template_instance->prod->rl_template_instantiations->insert( constant_map );
if ( ins_result.second )
{
Symbol *id, *attr, *value, *referent;
production *my_template = my_template_instance->prod;
action *my_action = my_template->action_list;
char first_letter;
double init_value = 0;
condition *cond_top, *cond_bottom;
// make unique production name
Symbol *new_name_symbol;
std::string new_name = "";
std::string empty_string = "";
std::string temp_id;
int new_id;
do
{
new_id = rl_next_template_id( my_agent );
to_string( new_id, temp_id );
new_name = ( "rl*" + empty_string + my_template->name->sc.name + "*" + temp_id );
} while ( find_sym_constant( my_agent, new_name.c_str() ) != NIL );
new_name_symbol = make_sym_constant( my_agent, new_name.c_str() );
// prep conditions
copy_condition_list( my_agent, my_template_instance->top_of_instantiated_conditions, &cond_top, &cond_bottom );
reset_variable_generator( my_agent, cond_top, NIL );
my_agent->variablization_tc = get_new_tc_number( my_agent );
variablize_condition_list( my_agent, cond_top );
variablize_nots_and_insert_into_conditions( my_agent, my_template_instance->nots, cond_top );
// get the preference value
id = instantiate_rhs_value( my_agent, my_action->id, -1, 's', tok, w );
attr = instantiate_rhs_value( my_agent, my_action->attr, id->id.level, 'a', tok, w );
first_letter = first_letter_from_symbol( attr );
value = instantiate_rhs_value( my_agent, my_action->value, id->id.level, first_letter, tok, w );
referent = instantiate_rhs_value( my_agent, my_action->referent, id->id.level, first_letter, tok, w );
// clean up after yourself :)
symbol_remove_ref( my_agent, id );
symbol_remove_ref( my_agent, attr );
symbol_remove_ref( my_agent, value );
symbol_remove_ref( my_agent, referent );
// make new action list
action *new_action = rl_make_simple_action( my_agent, id, attr, value, referent );
// make new production
production *new_production = make_production( my_agent, USER_PRODUCTION_TYPE, new_name_symbol, &cond_top, &cond_bottom, &new_action, false );
// set initial expected reward values
{
if ( referent->common.symbol_type == INT_CONSTANT_SYMBOL_TYPE )
{
init_value = static_cast< double >( referent->ic.value );
}
else if ( referent->common.symbol_type == FLOAT_CONSTANT_SYMBOL_TYPE )
{
init_value = referent->fc.value;
}
new_production->rl_ecr = 0.0;
new_production->rl_efr = init_value;
}
// attempt to add to rete, remove if duplicate
if ( add_production_to_rete( my_agent, new_production, cond_top, NULL, FALSE, TRUE ) == DUPLICATE_PRODUCTION )
{
excise_production( my_agent, new_production, false );
rl_revert_template_id( my_agent );
new_name_symbol = NULL;
}
deallocate_condition_list( my_agent, cond_top );
return_val = new_name_symbol;
}
}
return return_val;
}
void rl_clear_refs ( Symbol goal)

Definition at line 253 of file reinforcement_learning.cpp.

References symbol_union::id, rl_data_struct::prev_op_rl_rules, and identifier_struct::rl_info.

Referenced by remove_existing_context_and_descendents(), rl_reset_data(), and rl_store_data().

{
for ( rl_rule_list::iterator p=rules->begin(); p!=rules->end(); p++ )
{
(*p)->rl_ref_count--;
}
rules->clear();
}
bool rl_enabled ( agent my_agent)
int rl_get_template_id ( const char *  prod_name)

Definition at line 395 of file reinforcement_learning.cpp.

References wme_struct::id, and is_whole_number().

Referenced by rl_update_template_tracking().

{
std::string temp = prod_name;
// has to be at least "rl*a*#" (where a is a single letter/number/etc)
if ( temp.length() < 6 )
return -1;
// check first three letters are "rl*"
if ( temp.compare( 0, 3, "rl*" ) )
return -1;
// find last * to isolate id
std::string::size_type last_star = temp.find_last_of( '*' );
if ( last_star == std::string::npos )
return -1;
// make sure there's something left after last_star
if ( last_star == ( temp.length() - 1 ) )
return -1;
// make sure id is a valid natural number
std::string id_str = temp.substr( last_star + 1 );
if ( !is_whole_number( id_str ) )
return -1;
// convert id
int id;
from_string( id, id_str );
return id;
}
void rl_initialize_template_tracking ( agent my_agent)

Definition at line 428 of file reinforcement_learning.cpp.

References agent_struct::rl_template_count.

Referenced by create_soar_agent().

{
my_agent->rl_template_count = 1;
}
action* rl_make_simple_action ( agent my_gent,
Symbol id_sym,
Symbol attr_sym,
Symbol val_sym,
Symbol ref_sym 
)

Definition at line 656 of file reinforcement_learning.cpp.

References agent_struct::action_pool, action_struct::attr, action_struct::id, MAKE_ACTION, action_struct::next, NIL, action_struct::referent, symbol_add_ref(), symbol_to_rhs_value(), action_struct::type, action_struct::value, and variablize_symbol().

Referenced by rl_build_template_instantiation().

{
action *rhs;
Symbol *temp;
allocate_with_pool( my_agent, &my_agent->action_pool, &rhs );
rhs->next = NIL;
rhs->type = MAKE_ACTION;
// id
temp = id_sym;
symbol_add_ref( temp );
variablize_symbol( my_agent, &temp );
rhs->id = symbol_to_rhs_value( temp );
// attribute
temp = attr_sym;
symbol_add_ref( temp );
variablize_symbol( my_agent, &temp );
rhs->attr = symbol_to_rhs_value( temp );
// value
temp = val_sym;
symbol_add_ref( temp );
variablize_symbol( my_agent, &temp );
rhs->value = symbol_to_rhs_value( temp );
// referent
temp = ref_sym;
symbol_add_ref( temp );
variablize_symbol( my_agent, &temp );
rhs->referent = symbol_to_rhs_value( temp );
return rhs;
}
int rl_next_template_id ( agent my_agent)

Definition at line 443 of file reinforcement_learning.cpp.

References agent_struct::rl_template_count.

Referenced by rl_build_template_instantiation().

{
return (my_agent->rl_template_count++);
}
void rl_perform_update ( agent my_agent,
double  op_value,
bool  op_rl,
Symbol goal,
bool  update_efr = true 
)

Definition at line 849 of file reinforcement_learning.cpp.

References production_struct::action_list, rl_param_container::decay_mode, rl_param_container::delta_bar_delta_decay, rl_param_container::discount_rate, production_struct::documentation, rl_data_struct::eligibility_traces, rl_param_container::et_decay_rate, rl_param_container::et_tolerance, rl_param_container::exponential_decay, free_memory_block_for_string(), rl_data_struct::gap_age, rl_param_container::get_documentation_params(), soar_module::primitive_param< T >::get_value(), soar_module::string_param::get_value(), soar_module::constant_param< T >::get_value(), rl_data_struct::hrl_age, symbol_union::id, preference_struct::inst_next, production_struct::instantiations, rl_param_container::learning_rate, rl_param_container::logarithmic_decay, make_float_constant(), make_memory_block_for_string(), rl_param_container::meta, rl_param_container::meta_learning_rate, production_struct::name, sym_constant_struct::name, identifier_struct::name_letter, identifier_struct::name_number, instantiation_struct::next, rl_param_container::normal_decay, soar_module::on, rl_data_struct::prev_op_rl_rules, print(), action_struct::referent, rl_data_struct::reward, rhs_value_to_symbol(), production_struct::rl_delta_bar_delta_beta, production_struct::rl_delta_bar_delta_h, production_struct::rl_ecr, production_struct::rl_efr, identifier_struct::rl_info, agent_struct::rl_params, production_struct::rl_update_count, symbol_union::sc, symbol_remove_ref(), symbol_to_rhs_value(), agent_struct::sysparams, rl_param_container::temporal_discount, rl_param_container::temporal_extension, TRACE_RL_SYSPARAM, rl_param_container::update_log_path, xml_generate_message(), and xml_generate_warning().

Referenced by do_one_top_level_phase(), exploration_choose_according_to_policy(), remove_existing_context_and_descendents(), require_preference_semantics(), and run_preference_semantics().

{
bool using_gaps = ( my_agent->rl_params->temporal_extension->get_value() == soar_module::on );
if ( !using_gaps || op_rl )
{
rl_data *data = goal->id.rl_info;
if ( !data->prev_op_rl_rules->empty() )
{
rl_et_map::iterator iter;
double alpha = my_agent->rl_params->learning_rate->get_value();
double lambda = my_agent->rl_params->et_decay_rate->get_value();
double gamma = my_agent->rl_params->discount_rate->get_value();
double tolerance = my_agent->rl_params->et_tolerance->get_value();
double theta = my_agent->rl_params->meta_learning_rate->get_value();
// if temporal_discount is off, don't discount for gaps
unsigned int effective_age = data->hrl_age + 1;
effective_age += data->gap_age;
}
double discount = pow( gamma, static_cast< double >( effective_age ) );
// notify of gap closure
if ( data->gap_age && using_gaps && my_agent->sysparams[ TRACE_RL_SYSPARAM ] )
{
char buf[256];
SNPRINTF( buf, 254, "gap ended (%c%llu)", goal->id.name_letter, static_cast<long long unsigned>(goal->id.name_number) );
print( my_agent, buf );
xml_generate_warning( my_agent, buf );
}
// Iterate through eligibility_traces, decay traces. If less than TOLERANCE, remove from map.
if ( lambda == 0 )
{
if ( !data->eligibility_traces->empty() )
{
data->eligibility_traces->clear();
}
}
else
{
for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); )
{
iter->second *= lambda;
iter->second *= discount;
if ( iter->second < tolerance )
{
data->eligibility_traces->erase( iter++ );
}
else
{
++iter;
}
}
}
// Update trace for just fired prods
double sum_old_ecr = 0.0;
double sum_old_efr = 0.0;
if ( !data->prev_op_rl_rules->empty() )
{
double trace_increment = ( 1.0 / static_cast<double>( data->prev_op_rl_rules->size() ) );
rl_rule_list::iterator p;
for ( p=data->prev_op_rl_rules->begin(); p!=data->prev_op_rl_rules->end(); p++ )
{
sum_old_ecr += (*p)->rl_ecr;
sum_old_efr += (*p)->rl_efr;
iter = data->eligibility_traces->find( (*p) );
if ( iter != data->eligibility_traces->end() )
{
iter->second += trace_increment;
}
else
{
(*data->eligibility_traces)[ (*p) ] = trace_increment;
}
}
}
// For each prod with a trace, perform update
{
double old_ecr, old_efr;
double delta_ecr, delta_efr;
double new_combined, new_ecr, new_efr;
double delta_t = (data->reward + discount * op_value) - (sum_old_ecr + sum_old_efr);
for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); iter++ )
{
production *prod = iter->first;
// get old vals
old_ecr = prod->rl_ecr;
old_efr = prod->rl_efr;
// Adjust alpha based on decay policy
// Miller 11/14/2011
double adjusted_alpha;
switch (my_agent->rl_params->decay_mode->get_value())
{
adjusted_alpha = 1.0 / (prod->rl_update_count + 1.0);
break;
adjusted_alpha = 1.0 / (log(prod->rl_update_count + 1.0) + 1.0);
break;
{
// Note that in this case, x_i = 1.0 for all productions that are being updated.
// Those values have been included here for consistency with the algorithm as described in the delta bar delta paper.
prod->rl_delta_bar_delta_beta = prod->rl_delta_bar_delta_beta + theta * delta_t * 1.0 * prod->rl_delta_bar_delta_h;
adjusted_alpha = exp(prod->rl_delta_bar_delta_beta);
double decay_term = 1.0 - adjusted_alpha * 1.0 * 1.0;
if (decay_term < 0.0) decay_term = 0.0;
prod->rl_delta_bar_delta_h = prod->rl_delta_bar_delta_h * decay_term + adjusted_alpha * delta_t * 1.0;
break;
}
default:
adjusted_alpha = alpha;
break;
}
// calculate updates
delta_ecr = ( adjusted_alpha * iter->second * ( data->reward - sum_old_ecr ) );
if ( update_efr )
{
delta_efr = ( adjusted_alpha * iter->second * ( ( discount * op_value ) - sum_old_efr ) );
}
else
{
delta_efr = 0.0;
}
// calculate new vals
new_ecr = ( old_ecr + delta_ecr );
new_efr = ( old_efr + delta_efr );
new_combined = ( new_ecr + new_efr );
// print as necessary
if ( my_agent->sysparams[ TRACE_RL_SYSPARAM ] )
{
std::ostringstream ss;
ss << "RL update " << prod->name->sc.name << " "
<< old_ecr << " " << old_efr << " " << old_ecr + old_efr << " -> "
<< new_ecr << " " << new_efr << " " << new_combined ;
std::string temp_str( ss.str() );
print( my_agent, "%s\n", temp_str.c_str() );
xml_generate_message( my_agent, temp_str.c_str() );
// Log update to file if the log file has been set
std::string log_path = my_agent->rl_params->update_log_path->get_value();
if (!log_path.empty()) {
std::ofstream file(log_path.c_str(), std::ios_base::app);
file << ss.str() << std::endl;
file.close();
}
}
// Change value of rule
prod->action_list->referent = symbol_to_rhs_value( make_float_constant( my_agent, new_combined ) );
prod->rl_update_count += 1;
prod->rl_ecr = new_ecr;
prod->rl_efr = new_efr;
// change documentation
if ( my_agent->rl_params->meta->get_value() == soar_module::on )
{
if ( prod->documentation )
{
}
std::stringstream doc_ss;
const std::vector<std::pair<std::string, param_accessor<double> *> > &documentation_params = my_agent->rl_params->get_documentation_params();
for (std::vector<std::pair<std::string, param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin();
doc_params_it != documentation_params.end(); ++doc_params_it) {
doc_ss << doc_params_it->first << "=" << doc_params_it->second->get_param(prod) << ";";
}
prod->documentation = make_memory_block_for_string(my_agent, doc_ss.str().c_str());
/*
std::string rlupdates( "rlupdates=" );
std::string val;
to_string( static_cast< uint64_t >( prod->rl_update_count ), val );
rlupdates.append( val );
prod->documentation = make_memory_block_for_string( my_agent, rlupdates.c_str() );
*/
}
// Change value of preferences generated by current instantiations of this rule
if ( prod->instantiations )
{
for ( instantiation *inst = prod->instantiations; inst; inst = inst->next )
{
for ( preference *pref = inst->preferences_generated; pref; pref = pref->inst_next )
{
symbol_remove_ref( my_agent, pref->referent );
pref->referent = make_float_constant( my_agent, new_combined );
}
}
}
}
}
}
data->gap_age = 0;
data->hrl_age = 0;
data->reward = 0.0;
}
}
void rl_remove_refs_for_prod ( agent my_agent,
production prod 
)

Definition at line 290 of file reinforcement_learning.cpp.

References symbol_union::id, identifier_struct::lower_goal, rl_remove_ref(), and agent_struct::top_state.

Referenced by excise_production().

{
for ( Symbol* state = my_agent->top_state; state; state = state->id.lower_goal )
{
state->id.rl_info->eligibility_traces->erase( prod );
rl_remove_ref( state, prod );
}
}
void rl_revert_template_id ( agent my_agent)

Definition at line 449 of file reinforcement_learning.cpp.

References agent_struct::rl_template_count.

Referenced by rl_build_template_instantiation().

{
my_agent->rl_template_count--;
}
void rl_rule_meta ( agent my_agent,
production prod 
)

Definition at line 350 of file reinforcement_learning.cpp.

References production_struct::documentation, rl_param_container::get_documentation_params(), soar_module::constant_param< T >::get_value(), rl_param_container::meta, soar_module::on, agent_struct::rl_params, and param_accessor< T >::set_param().

Referenced by parse_production(), and reteload_node_and_children().

{
if ( prod->documentation && ( my_agent->rl_params->meta->get_value() == soar_module::on ) )
{
std::string doc( prod->documentation );
const std::vector<std::pair<std::string, param_accessor<double> *> > &documentation_params = my_agent->rl_params->get_documentation_params();
for (std::vector<std::pair<std::string, param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin();
doc_params_it != documentation_params.end(); ++doc_params_it) {
const std::string &param_name = doc_params_it->first;
param_accessor<double> *accessor = doc_params_it->second;
std::stringstream param_name_ss;
param_name_ss << param_name << "=";
std::string search_term = param_name_ss.str();
size_t begin_index = doc.find(search_term);
if (begin_index == std::string::npos) continue;
begin_index += search_term.size();
size_t end_index = doc.find(";", begin_index);
if (end_index == std::string::npos) continue;
std::string param_value_str = doc.substr(begin_index, end_index);
accessor->set_param(prod, param_value_str);
}
/*
std::string search( "rlupdates=" );
if ( doc.length() > search.length() )
{
if ( doc.substr( 0, search.length() ).compare( search ) == 0 )
{
uint64_t val;
from_string( val, doc.substr( search.length() ) );
prod->rl_update_count = static_cast< double >( val );
}
}
*/
}
}
void rl_store_data ( agent my_agent,
Symbol goal,
preference cand 
)

Definition at line 790 of file reinforcement_learning.cpp.

References rl_data_struct::gap_age, soar_module::constant_param< T >::get_value(), symbol_union::id, identifier_struct::name_letter, identifier_struct::name_number, preference_struct::next, NUMERIC_INDIFFERENT_PREFERENCE_TYPE, preference_struct::numeric_value, soar_module::on, identifier_struct::operator_slot, slot_struct::preferences, rl_data_struct::prev_op_rl_rules, rl_data_struct::previous_q, print(), rl_add_ref(), rl_clear_refs(), identifier_struct::rl_info, agent_struct::rl_params, agent_struct::sysparams, rl_param_container::temporal_extension, TRACE_RL_SYSPARAM, preference_struct::value, and xml_generate_warning().

Referenced by decide_context_slot().

{
rl_data *data = goal->id.rl_info;
Symbol *op = cand->value;
bool using_gaps = ( my_agent->rl_params->temporal_extension->get_value() == soar_module::on );
// Make list of just-fired prods
unsigned int just_fired = 0;
for ( preference *pref = goal->id.operator_slot->preferences[ NUMERIC_INDIFFERENT_PREFERENCE_TYPE ]; pref; pref = pref->next )
{
if ( ( op == pref->value ) && pref->inst->prod->rl_rule )
{
if ( ( just_fired == 0 ) && !data->prev_op_rl_rules->empty() )
{
rl_clear_refs( goal );
}
rl_add_ref( goal, pref->inst->prod );
just_fired++;
}
}
if ( just_fired )
{
data->previous_q = cand->numeric_value;
}
else
{
if ( my_agent->sysparams[ TRACE_RL_SYSPARAM ] && using_gaps &&
( data->gap_age == 0 ) && !data->prev_op_rl_rules->empty() )
{
char buf[256];
SNPRINTF( buf, 254, "gap started (%c%llu)", goal->id.name_letter, static_cast<long long unsigned>(goal->id.name_number) );
print( my_agent, buf );
xml_generate_warning( my_agent, buf );
}
if ( !using_gaps )
{
if ( !data->prev_op_rl_rules->empty() )
{
rl_clear_refs( goal );
}
data->previous_q = cand->numeric_value;
}
else
{
if ( !data->prev_op_rl_rules->empty() )
{
data->gap_age++;
}
}
}
}
void rl_tabulate_reward_value_for_goal ( agent my_agent,
Symbol goal 
)

Definition at line 723 of file reinforcement_learning.cpp.

References agent_struct::bottom_goal, rl_param_container::discount_rate, find_slot(), FLOAT_CONSTANT_SYMBOL_TYPE, rl_data_struct::gap_age, get_number_from_symbol(), soar_module::primitive_param< T >::get_value(), soar_module::constant_param< T >::get_value(), soar_module::primitive_stat< T >::get_value(), rl_stat_container::global_reward, rl_data_struct::hrl_age, rl_param_container::hrl_discount, symbol_union::id, IDENTIFIER_SYMBOL_TYPE, INT_CONSTANT_SYMBOL_TYPE, wme_struct::next, soar_module::on, rl_data_struct::prev_op_rl_rules, rl_data_struct::reward, identifier_struct::reward_header, identifier_struct::rl_info, agent_struct::rl_params, agent_struct::rl_stats, agent_struct::rl_sym_reward, agent_struct::rl_sym_value, soar_module::primitive_stat< T >::set_value(), rl_param_container::temporal_discount, rl_stat_container::total_reward, wme_struct::value, and slot_struct::wmes.

Referenced by do_one_top_level_phase(), remove_existing_context_and_descendents(), and rl_tabulate_reward_values().

{
rl_data *data = goal->id.rl_info;
if ( !data->prev_op_rl_rules->empty() )
{
slot *s = find_slot( goal->id.reward_header, my_agent->rl_sym_reward );
slot *t;
wme *w, *x;
double reward = 0.0;
double discount_rate = my_agent->rl_params->discount_rate->get_value();
if ( s )
{
for ( w=s->wmes; w; w=w->next )
{
if ( w->value->common.symbol_type == IDENTIFIER_SYMBOL_TYPE )
{
t = find_slot( w->value, my_agent->rl_sym_value );
if ( t )
{
for ( x=t->wmes; x; x=x->next )
{
if ( ( x->value->common.symbol_type == FLOAT_CONSTANT_SYMBOL_TYPE ) || ( x->value->common.symbol_type == INT_CONSTANT_SYMBOL_TYPE ) )
{
reward += get_number_from_symbol( x->value );
}
}
}
}
}
// if temporal_discount is off, don't discount for gaps
unsigned int effective_age = data->hrl_age;
effective_age += data->gap_age;
}
data->reward += ( reward * pow( discount_rate, static_cast< double >( effective_age ) ) );
}
// update stats
double global_reward = my_agent->rl_stats->global_reward->get_value();
my_agent->rl_stats->total_reward->set_value( reward );
my_agent->rl_stats->global_reward->set_value( global_reward + reward );
if ( ( goal != my_agent->bottom_goal ) && ( my_agent->rl_params->hrl_discount->get_value() == soar_module::on ) )
{
data->hrl_age++;
}
}
}
void rl_tabulate_reward_values ( agent my_agent)
void rl_update_template_tracking ( agent my_agent,
const char *  rule_name 
)

Definition at line 434 of file reinforcement_learning.cpp.

References rl_get_template_id(), and agent_struct::rl_template_count.

Referenced by make_production().

{
int new_id = rl_get_template_id( rule_name );
if ( ( new_id != -1 ) && ( new_id > my_agent->rl_template_count ) )
my_agent->rl_template_count = ( new_id + 1 );
}
bool rl_valid_rule ( production prod)

Definition at line 331 of file reinforcement_learning.cpp.

References production_struct::action_list, MAKE_ACTION, action_struct::next, and NUMERIC_INDIFFERENT_PREFERENCE_TYPE.

Referenced by make_production(), and reteload_node_and_children().

{
bool numeric_pref = false;
int num_actions = 0;
for ( action *a = prod->action_list; a; a = a->next )
{
num_actions++;
if ( a->type == MAKE_ACTION )
{
if ( a->preference_type == NUMERIC_INDIFFERENT_PREFERENCE_TYPE )
numeric_pref = true;
}
}
return ( numeric_pref && ( num_actions == 1 ) );
}
bool rl_valid_template ( production prod)

Definition at line 304 of file reinforcement_learning.cpp.

References production_struct::action_list, BINARY_INDIFFERENT_PREFERENCE_TYPE, identifier_struct::common_symbol_info, symbol_union::id, MAKE_ACTION, action_struct::next, NUMERIC_INDIFFERENT_PREFERENCE_TYPE, rhs_value_is_symbol(), rhs_value_to_symbol(), symbol_common_data_struct::symbol_type, and VARIABLE_SYMBOL_TYPE.

Referenced by parse_production().

{
bool numeric_pref = false;
bool var_pref = false;
int num_actions = 0;
for ( action *a = prod->action_list; a; a = a->next )
{
num_actions++;
if ( a->type == MAKE_ACTION )
{
if ( a->preference_type == NUMERIC_INDIFFERENT_PREFERENCE_TYPE )
{
numeric_pref = true;
}
else if ( a->preference_type == BINARY_INDIFFERENT_PREFERENCE_TYPE )
{
var_pref = true;
}
}
}
return ( ( num_actions == 1 ) && ( numeric_pref || var_pref ) );
}
void rl_watkins_clear ( agent my_agent,
Symbol goal 
)