#include <map>
#include <string>
#include <list>
#include <vector>
#include "soar_module.h"
#include "chunk.h"
#include "production.h"

Data Structures
class	param_accessor< T >
class	rl_apoptosis_param
class	rl_apoptosis_predicate< T >
class	rl_apoptosis_thresh_param
struct	rl_data_struct
class	rl_dbd_h_accessor
class	rl_learning_param
class	rl_param_container
class	rl_stat_container
class	rl_updates_accessor

Macros
#define	OP_NO_CHANGE_IMPASSE_TYPE -2
#define	STATE_NO_CHANGE_IMPASSE_TYPE -1

Typedefs
typedef struct rl_data_struct	rl_data
typedef std::map< production , double, std::less < production > , soar_module::soar_memory_pool_allocator < std::pair< production *, double > > >	rl_et_map
typedef soar_module::bla_object_memory < production, 10, 50 >	rl_production_memory
typedef std::list< production , soar_module::soar_memory_pool_allocator < production > >	rl_rule_list
typedef std::map< Symbol , Symbol >	rl_symbol_map
typedef std::set< rl_symbol_map >	rl_symbol_map_set

Functions
void	rl_add_goal_or_impasse_tests_to_conds (agent my_agent, condition all_conds)
Symbol *	rl_build_template_instantiation (agent my_agent, instantiation my_template_instance, struct token_struct tok, wme w)
void	rl_clear_refs (Symbol *goal)
bool	rl_enabled (agent *my_agent)
int	rl_get_template_id (const char *prod_name)
void	rl_initialize_template_tracking (agent *my_agent)
action *	rl_make_simple_action (agent my_gent, Symbol id_sym, Symbol attr_sym, Symbol val_sym, Symbol *ref_sym)
int	rl_next_template_id (agent *my_agent)
void	rl_perform_update (agent my_agent, double op_value, bool op_rl, Symbol goal, bool update_efr=true)
void	rl_remove_refs_for_prod (agent my_agent, production prod)
void	rl_revert_template_id (agent *my_agent)
void	rl_rule_meta (agent my_agent, production prod)
void	rl_store_data (agent my_agent, Symbol goal, preference *cand)
void	rl_tabulate_reward_value_for_goal (agent my_agent, Symbol goal)
void	rl_tabulate_reward_values (agent *my_agent)
void	rl_update_template_tracking (agent my_agent, const char rule_name)
bool	rl_valid_rule (production *prod)
bool	rl_valid_template (production *prod)
void	rl_watkins_clear (agent my_agent, Symbol goal)

Macro Definition Documentation

#define OP_NO_CHANGE_IMPASSE_TYPE -2

Definition at line 33 of file reinforcement_learning.h.

#define STATE_NO_CHANGE_IMPASSE_TYPE -1

Definition at line 32 of file reinforcement_learning.h.

Typedef Documentation

typedef struct rl_data_struct rl_data

typedef std::map< production*, double, std::less< production* >, soar_module::soar_memory_pool_allocator< std::pair< production*, double > > > rl_et_map

Definition at line 170 of file reinforcement_learning.h.

typedef soar_module::bla_object_memory< production, 10, 50 > rl_production_memory

Definition at line 198 of file reinforcement_learning.h.

typedef std::list< production*, soar_module::soar_memory_pool_allocator< production* > > rl_rule_list

Definition at line 177 of file reinforcement_learning.h.

typedef std::map< Symbol*, Symbol* > rl_symbol_map

Definition at line 194 of file reinforcement_learning.h.

typedef std::set< rl_symbol_map > rl_symbol_map_set

Definition at line 195 of file reinforcement_learning.h.

Function Documentation

void rl_add_goal_or_impasse_tests_to_conds	(	agent *	my_agent,
		condition *	all_conds
	)

Definition at line 692 of file reinforcement_learning.cpp.

References add_new_test_to_test(), agent_struct::complex_test_pool, get_new_tc_number(), GOAL_ID_TEST, wme_struct::id, symbol_union::id, IMPASSE_ID_TEST, identifier_struct::isa_goal, identifier_struct::isa_impasse, make_test_from_complex_test(), NIL, POSITIVE_CONDITION, referent_of_equality_test(), identifier_struct::tc_num, and complex_test_struct::type.

Referenced by rl_build_template_instantiation().

{
 // mark each id as we add a test for it, so we don't add a test for the same id in two different places
 Symbol *id;
 test t;
 complex_test *ct;
 tc_number tc = get_new_tc_number( my_agent );
 for ( condition *cond = all_conds; cond != NIL; cond = cond->next )
 {
  if ( cond->type != POSITIVE_CONDITION )
   continue;
  id = referent_of_equality_test( cond->data.tests.id_test );
  if ( ( id->id.isa_goal || id->id.isa_impasse ) && ( id->id.tc_num != tc ) ) 
  {
   allocate_with_pool( my_agent, &my_agent->complex_test_pool, &ct );
   ct->type = static_cast<byte>( ( id->id.isa_goal )?( GOAL_ID_TEST ):( IMPASSE_ID_TEST ) );
   t = make_test_from_complex_test( ct );
   add_new_test_to_test( my_agent, &( cond->data.tests.id_test ), t );
   id->id.tc_num = tc;
  }
 }
}

Symbol* rl_build_template_instantiation	(	agent *	my_agent,
		instantiation *	my_template_instance,
		struct token_struct *	tok,
		wme *	w
	)

Definition at line 540 of file reinforcement_learning.cpp.

References production_struct::action_list, add_production_to_rete(), wme_struct::attr, action_struct::attr, copy_condition_list(), deallocate_condition_list(), DUPLICATE_PRODUCTION, excise_production(), FALSE, symbol_union::fc, find_sym_constant(), first_letter_from_symbol(), FLOAT_CONSTANT_SYMBOL_TYPE, get_new_tc_number(), symbol_union::ic, wme_struct::id, action_struct::id, symbol_union::id, instantiate_rhs_value(), INT_CONSTANT_SYMBOL_TYPE, identifier_struct::level, make_production(), make_sym_constant(), production_struct::name, sym_constant_struct::name, NIL, instantiation_struct::nots, NUMERIC_INDIFFERENT_PREFERENCE_TYPE, production_struct::p_node, p_node_to_conditions_and_nots(), action_struct::preference_type, instantiation_struct::prod, action_struct::referent, reset_variable_generator(), rl_add_goal_or_impasse_tests_to_conds(), production_struct::rl_ecr, production_struct::rl_efr, rl_get_template_constants(), rl_make_simple_action(), rl_next_template_id(), rl_revert_template_id(), production_struct::rl_template_conds, production_struct::rl_template_instantiations, symbol_union::sc, symbol_remove_ref(), instantiation_struct::top_of_instantiated_conditions, TRUE, USER_PRODUCTION_TYPE, wme_struct::value, int_constant_struct::value, float_constant_struct::value, action_struct::value, agent_struct::variablization_tc, variablize_condition_list(), and variablize_nots_and_insert_into_conditions().

Referenced by create_instantiation().

{ 
 Symbol* return_val = NULL;
 
 // initialize production conditions
 if ( my_template_instance->prod->rl_template_conds == NIL )
 {
  not_struct* nots;
  condition* c_top;
  condition* c_bottom;
  p_node_to_conditions_and_nots( my_agent, my_template_instance->prod->p_node, NIL, NIL, &( c_top ), &( c_bottom ), &( nots ), NIL );
  my_template_instance->prod->rl_template_conds = c_top;
 }
 // initialize production instantiation set
 if ( my_template_instance->prod->rl_template_instantiations == NIL )
 {
  my_template_instance->prod->rl_template_instantiations = new rl_symbol_map_set;
 }
 // get constants
 rl_symbol_map constant_map;
 { 
  rl_get_template_constants( my_template_instance->prod->rl_template_conds, my_template_instance->top_of_instantiated_conditions, &( constant_map ) );  
 }
 // try to insert into instantiation set
 //if ( !constant_map.empty() )
 {
  std::pair< rl_symbol_map_set::iterator, bool > ins_result = my_template_instance->prod->rl_template_instantiations->insert( constant_map );
  if ( ins_result.second )
  {
   Symbol *id, *attr, *value, *referent;
   production *my_template = my_template_instance->prod;
   action *my_action = my_template->action_list;
   char first_letter;
   double init_value = 0;
   condition *cond_top, *cond_bottom;
   // make unique production name
   Symbol *new_name_symbol;
   std::string new_name = "";
   std::string empty_string = "";
   std::string temp_id;
   int new_id;
   do
   {
    new_id = rl_next_template_id( my_agent );
    to_string( new_id, temp_id );
    new_name = ( "rl*" + empty_string + my_template->name->sc.name + "*" + temp_id );
   } while ( find_sym_constant( my_agent, new_name.c_str() ) != NIL );
   new_name_symbol = make_sym_constant( my_agent, new_name.c_str() );
   
   // prep conditions
   copy_condition_list( my_agent, my_template_instance->top_of_instantiated_conditions, &cond_top, &cond_bottom );
   rl_add_goal_or_impasse_tests_to_conds( my_agent, cond_top );
   reset_variable_generator( my_agent, cond_top, NIL );
   my_agent->variablization_tc = get_new_tc_number( my_agent );
   variablize_condition_list( my_agent, cond_top );
   variablize_nots_and_insert_into_conditions( my_agent, my_template_instance->nots, cond_top );
   // get the preference value
   id = instantiate_rhs_value( my_agent, my_action->id, -1, 's', tok, w );
   attr = instantiate_rhs_value( my_agent, my_action->attr, id->id.level, 'a', tok, w );
   first_letter = first_letter_from_symbol( attr );
   value = instantiate_rhs_value( my_agent, my_action->value, id->id.level, first_letter, tok, w );
   referent = instantiate_rhs_value( my_agent, my_action->referent, id->id.level, first_letter, tok, w );
   // clean up after yourself :)
   symbol_remove_ref( my_agent, id );
   symbol_remove_ref( my_agent, attr );
   symbol_remove_ref( my_agent, value );
   symbol_remove_ref( my_agent, referent );
   // make new action list
   action *new_action = rl_make_simple_action( my_agent, id, attr, value, referent );
   new_action->preference_type = NUMERIC_INDIFFERENT_PREFERENCE_TYPE;
   // make new production
   production *new_production = make_production( my_agent, USER_PRODUCTION_TYPE, new_name_symbol, &cond_top, &cond_bottom, &new_action, false );
   // set initial expected reward values
   {
    if ( referent->common.symbol_type == INT_CONSTANT_SYMBOL_TYPE )
    {
     init_value = static_cast< double >( referent->ic.value );
    }
    else if ( referent->common.symbol_type == FLOAT_CONSTANT_SYMBOL_TYPE )
    {
     init_value = referent->fc.value;
    }
    new_production->rl_ecr = 0.0;
    new_production->rl_efr = init_value;
   }
   // attempt to add to rete, remove if duplicate
   if ( add_production_to_rete( my_agent, new_production, cond_top, NULL, FALSE, TRUE ) == DUPLICATE_PRODUCTION )
   {
    excise_production( my_agent, new_production, false );
    rl_revert_template_id( my_agent );
    new_name_symbol = NULL;
   }
   deallocate_condition_list( my_agent, cond_top );
   return_val = new_name_symbol;
  }
 }
 return return_val;
}

void rl_clear_refs ( Symbol * goal )

Definition at line 253 of file reinforcement_learning.cpp.

References symbol_union::id, rl_data_struct::prev_op_rl_rules, and identifier_struct::rl_info.

Referenced by remove_existing_context_and_descendents(), rl_reset_data(), and rl_store_data().

{
 rl_rule_list* rules = goal->id.rl_info->prev_op_rl_rules;
 
 for ( rl_rule_list::iterator p=rules->begin(); p!=rules->end(); p++ )
 {
  (*p)->rl_ref_count--;
 }
 rules->clear();
}

bool rl_enabled ( agent * my_agent )

Definition at line 224 of file reinforcement_learning.cpp.

References soar_module::constant_param< T >::get_value(), rl_param_container::learning, soar_module::on, and agent_struct::rl_params.

Referenced by decide_context_slot(), do_one_top_level_phase(), exploration_choose_according_to_policy(), remove_existing_context_and_descendents(), require_preference_semantics(), and run_preference_semantics().

{
 return ( my_agent->rl_params->learning->get_value() == soar_module::on );
}

int rl_get_template_id ( const char * prod_name )

Definition at line 395 of file reinforcement_learning.cpp.

References wme_struct::id, and is_whole_number().

Referenced by rl_update_template_tracking().

{
 std::string temp = prod_name;
 
 // has to be at least "rl*a*#" (where a is a single letter/number/etc)
 if ( temp.length() < 6 )
  return -1;
 
 // check first three letters are "rl*"
 if ( temp.compare( 0, 3, "rl*" ) )
  return -1;
 
 // find last * to isolate id
 std::string::size_type last_star = temp.find_last_of( '*' );
 if ( last_star == std::string::npos )
  return -1;
 
 // make sure there's something left after last_star
 if ( last_star == ( temp.length() - 1 ) )
  return -1;
 
 // make sure id is a valid natural number
 std::string id_str = temp.substr( last_star + 1 );
 if ( !is_whole_number( id_str ) )
  return -1;
 
 // convert id
 int id;
 from_string( id, id_str );
 return id;
}

void rl_initialize_template_tracking ( agent * my_agent )

Definition at line 428 of file reinforcement_learning.cpp.

References agent_struct::rl_template_count.

Referenced by create_soar_agent().

{
 my_agent->rl_template_count = 1;
}

action* rl_make_simple_action	(	agent *	my_gent,
		Symbol *	id_sym,
		Symbol *	attr_sym,
		Symbol *	val_sym,
		Symbol *	ref_sym
	)

Definition at line 656 of file reinforcement_learning.cpp.

References agent_struct::action_pool, action_struct::attr, action_struct::id, MAKE_ACTION, action_struct::next, NIL, action_struct::referent, symbol_add_ref(), symbol_to_rhs_value(), action_struct::type, action_struct::value, and variablize_symbol().

Referenced by rl_build_template_instantiation().

{
    action *rhs;
    Symbol *temp;
    allocate_with_pool( my_agent, &my_agent->action_pool, &rhs );
    rhs->next = NIL;
    rhs->type = MAKE_ACTION;
    // id
 temp = id_sym;
 symbol_add_ref( temp );
 variablize_symbol( my_agent, &temp );
 rhs->id = symbol_to_rhs_value( temp );
    // attribute
    temp = attr_sym;
 symbol_add_ref( temp );
 variablize_symbol( my_agent, &temp );
 rhs->attr = symbol_to_rhs_value( temp );
 // value
 temp = val_sym;
 symbol_add_ref( temp );
 variablize_symbol( my_agent, &temp );
 rhs->value = symbol_to_rhs_value( temp );
 // referent
 temp = ref_sym;
 symbol_add_ref( temp );
 variablize_symbol( my_agent, &temp );
 rhs->referent = symbol_to_rhs_value( temp );
    return rhs;
}

int rl_next_template_id ( agent * my_agent )

Definition at line 443 of file reinforcement_learning.cpp.

References agent_struct::rl_template_count.

Referenced by rl_build_template_instantiation().

{
 return (my_agent->rl_template_count++);
}

void rl_perform_update	(	agent *	my_agent,
		double	op_value,
		bool	op_rl,
		Symbol *	goal,
		bool	update_efr = `true`
	)

Definition at line 849 of file reinforcement_learning.cpp.

Referenced by do_one_top_level_phase(), exploration_choose_according_to_policy(), remove_existing_context_and_descendents(), require_preference_semantics(), and run_preference_semantics().

{
 bool using_gaps = ( my_agent->rl_params->temporal_extension->get_value() == soar_module::on );
 if ( !using_gaps || op_rl )
 {  
  rl_data *data = goal->id.rl_info;
  
  if ( !data->prev_op_rl_rules->empty() )
  {   
   rl_et_map::iterator iter;   
   double alpha = my_agent->rl_params->learning_rate->get_value();
   double lambda = my_agent->rl_params->et_decay_rate->get_value();
   double gamma = my_agent->rl_params->discount_rate->get_value();
   double tolerance = my_agent->rl_params->et_tolerance->get_value();
            double theta = my_agent->rl_params->meta_learning_rate->get_value();
   // if temporal_discount is off, don't discount for gaps
   unsigned int effective_age = data->hrl_age + 1;
   if (my_agent->rl_params->temporal_discount->get_value() == soar_module::on) {
    effective_age += data->gap_age;
   }
 
   double discount = pow( gamma, static_cast< double >( effective_age ) );
   // notify of gap closure
   if ( data->gap_age && using_gaps && my_agent->sysparams[ TRACE_RL_SYSPARAM ] )
   {
    char buf[256];
    SNPRINTF( buf, 254, "gap ended (%c%llu)", goal->id.name_letter, static_cast<long long unsigned>(goal->id.name_number) );
    print( my_agent, buf );
    xml_generate_warning( my_agent, buf );
   }   
   // Iterate through eligibility_traces, decay traces. If less than TOLERANCE, remove from map.
   if ( lambda == 0 )
   {
    if ( !data->eligibility_traces->empty() )
    {
     data->eligibility_traces->clear();
    }
   }
   else
   {
    for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); )
    {
     iter->second *= lambda;
     iter->second *= discount;
     if ( iter->second < tolerance ) 
     {
      data->eligibility_traces->erase( iter++ );
     }
     else 
     {
      ++iter;
     }
    }
   }
   
   // Update trace for just fired prods
   double sum_old_ecr = 0.0;
   double sum_old_efr = 0.0;
   if ( !data->prev_op_rl_rules->empty() )
   {
    double trace_increment = ( 1.0 / static_cast<double>( data->prev_op_rl_rules->size() ) );
    rl_rule_list::iterator p;
    
    for ( p=data->prev_op_rl_rules->begin(); p!=data->prev_op_rl_rules->end(); p++ )
    {
     sum_old_ecr += (*p)->rl_ecr;
     sum_old_efr += (*p)->rl_efr;
     
     iter = data->eligibility_traces->find( (*p) );
     
     if ( iter != data->eligibility_traces->end() ) 
     {
      iter->second += trace_increment;
     }
     else 
     {
      (*data->eligibility_traces)[ (*p) ] = trace_increment;
     }
    }
   }
   
   // For each prod with a trace, perform update
   {
    double old_ecr, old_efr;
    double delta_ecr, delta_efr;
    double new_combined, new_ecr, new_efr;
                double delta_t = (data->reward + discount * op_value) - (sum_old_ecr + sum_old_efr);
    
    for ( iter = data->eligibility_traces->begin(); iter != data->eligibility_traces->end(); iter++ )
    { 
     production *prod = iter->first;
     // get old vals
     old_ecr = prod->rl_ecr;
     old_efr = prod->rl_efr;
                    // Adjust alpha based on decay policy
                    // Miller 11/14/2011
                    double adjusted_alpha;
                    switch (my_agent->rl_params->decay_mode->get_value())
                    {
                        case rl_param_container::exponential_decay:
                            adjusted_alpha = 1.0 / (prod->rl_update_count + 1.0);
                            break;
                        case rl_param_container::logarithmic_decay:
                            adjusted_alpha = 1.0 / (log(prod->rl_update_count + 1.0) + 1.0);
                            break;
                        case rl_param_container::delta_bar_delta_decay:
                            {
                                // Note that in this case, x_i = 1.0 for all productions that are being updated.
                                // Those values have been included here for consistency with the algorithm as described in the delta bar delta paper.
                                prod->rl_delta_bar_delta_beta = prod->rl_delta_bar_delta_beta + theta * delta_t * 1.0 * prod->rl_delta_bar_delta_h;
                                adjusted_alpha = exp(prod->rl_delta_bar_delta_beta);
                                double decay_term = 1.0 - adjusted_alpha * 1.0 * 1.0;
                                if (decay_term < 0.0) decay_term = 0.0;
                                prod->rl_delta_bar_delta_h = prod->rl_delta_bar_delta_h * decay_term + adjusted_alpha * delta_t * 1.0;
                                break;
                            }
                        case rl_param_container::normal_decay:
                        default:
                            adjusted_alpha = alpha;
                            break;
                    }
                    // calculate updates
                    delta_ecr = ( adjusted_alpha * iter->second * ( data->reward - sum_old_ecr ) );
                    if ( update_efr )
                    {
                        delta_efr = ( adjusted_alpha * iter->second * ( ( discount * op_value ) - sum_old_efr ) );
                    }
                    else
     {
      delta_efr = 0.0;
     }     
     // calculate new vals
     new_ecr = ( old_ecr + delta_ecr );
     new_efr = ( old_efr + delta_efr );
     new_combined = ( new_ecr + new_efr );
     
     // print as necessary
     if ( my_agent->sysparams[ TRACE_RL_SYSPARAM ] ) 
     {
      std::ostringstream ss;      
      ss << "RL update " << prod->name->sc.name << " "
         << old_ecr << " " << old_efr << " " << old_ecr + old_efr << " -> "
         << new_ecr << " " << new_efr << " " << new_combined ;
      std::string temp_str( ss.str() );      
      print( my_agent, "%s\n", temp_str.c_str() );
      xml_generate_message( my_agent, temp_str.c_str() );
                        // Log update to file if the log file has been set
                        std::string log_path = my_agent->rl_params->update_log_path->get_value();
                        if (!log_path.empty()) {
                            std::ofstream file(log_path.c_str(), std::ios_base::app);
                            file << ss.str() << std::endl;
                            file.close();
                        }
                    }
                    // Change value of rule
                    symbol_remove_ref( my_agent, rhs_value_to_symbol( prod->action_list->referent ) );
                    prod->action_list->referent = symbol_to_rhs_value( make_float_constant( my_agent, new_combined ) );
                    prod->rl_update_count += 1;
                    prod->rl_ecr = new_ecr;
                    prod->rl_efr = new_efr;
                    // change documentation
                    if ( my_agent->rl_params->meta->get_value() == soar_module::on )
                    {
                        if ( prod->documentation )
                        {
                            free_memory_block_for_string( my_agent, prod->documentation );
                        }
                        std::stringstream doc_ss;
                        const std::vector<std::pair<std::string, param_accessor<double> *> > &documentation_params = my_agent->rl_params->get_documentation_params();
                        for (std::vector<std::pair<std::string, param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin();
                                doc_params_it != documentation_params.end(); ++doc_params_it) {
                            doc_ss << doc_params_it->first << "=" << doc_params_it->second->get_param(prod) << ";";
                        }
                        prod->documentation = make_memory_block_for_string(my_agent, doc_ss.str().c_str());
                        /*
      std::string rlupdates( "rlupdates=" );
      std::string val;
      to_string( static_cast< uint64_t >( prod->rl_update_count ), val );
      rlupdates.append( val );
      prod->documentation = make_memory_block_for_string( my_agent, rlupdates.c_str() );
                        */
     }
     // Change value of preferences generated by current instantiations of this rule
     if ( prod->instantiations )
     {
      for ( instantiation *inst = prod->instantiations; inst; inst = inst->next )
      {
       for ( preference *pref = inst->preferences_generated; pref; pref = pref->inst_next )
       {
        symbol_remove_ref( my_agent, pref->referent );
        pref->referent = make_float_constant( my_agent, new_combined );
       }
      }
     } 
    }
   }
  }
  data->gap_age = 0;
  data->hrl_age = 0;
  data->reward = 0.0;
 }
}

void rl_remove_refs_for_prod	(	agent *	my_agent,
		production *	prod
	)

Definition at line 290 of file reinforcement_learning.cpp.

References symbol_union::id, identifier_struct::lower_goal, rl_remove_ref(), and agent_struct::top_state.

Referenced by excise_production().

{
 for ( Symbol* state = my_agent->top_state; state; state = state->id.lower_goal )
 {
  state->id.rl_info->eligibility_traces->erase( prod );
  rl_remove_ref( state, prod );
 }
}

void rl_revert_template_id ( agent * my_agent )

Definition at line 449 of file reinforcement_learning.cpp.

References agent_struct::rl_template_count.

Referenced by rl_build_template_instantiation().

{
 my_agent->rl_template_count--;
}

void rl_rule_meta	(	agent *	my_agent,
		production *	prod
	)

Definition at line 350 of file reinforcement_learning.cpp.

References production_struct::documentation, rl_param_container::get_documentation_params(), soar_module::constant_param< T >::get_value(), rl_param_container::meta, soar_module::on, agent_struct::rl_params, and param_accessor< T >::set_param().

Referenced by parse_production(), and reteload_node_and_children().

{
 if ( prod->documentation && ( my_agent->rl_params->meta->get_value() == soar_module::on ) )
 {
  std::string doc( prod->documentation );
        const std::vector<std::pair<std::string, param_accessor<double> *> > &documentation_params = my_agent->rl_params->get_documentation_params();
        for (std::vector<std::pair<std::string, param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin();
                doc_params_it != documentation_params.end(); ++doc_params_it) {
            const std::string &param_name = doc_params_it->first;
            param_accessor<double> *accessor = doc_params_it->second;
            std::stringstream param_name_ss;
            param_name_ss << param_name << "=";
            std::string search_term = param_name_ss.str();
            size_t begin_index = doc.find(search_term);
            if (begin_index == std::string::npos) continue;
            begin_index += search_term.size();
            size_t end_index = doc.find(";", begin_index);
            if (end_index == std::string::npos) continue;
            std::string param_value_str = doc.substr(begin_index, end_index);
            accessor->set_param(prod, param_value_str);
        }
        /*
  std::string search( "rlupdates=" );
  if ( doc.length() > search.length() )
  {
   if ( doc.substr( 0, search.length() ).compare( search ) == 0 )
   {
    uint64_t val;
    from_string( val, doc.substr( search.length() ) );
    prod->rl_update_count = static_cast< double >( val );
   }
  }
        */
 }
}

void rl_store_data	(	agent *	my_agent,
		Symbol *	goal,
		preference *	cand
	)

Definition at line 790 of file reinforcement_learning.cpp.

References rl_data_struct::gap_age, soar_module::constant_param< T >::get_value(), symbol_union::id, identifier_struct::name_letter, identifier_struct::name_number, preference_struct::next, NUMERIC_INDIFFERENT_PREFERENCE_TYPE, preference_struct::numeric_value, soar_module::on, identifier_struct::operator_slot, slot_struct::preferences, rl_data_struct::prev_op_rl_rules, rl_data_struct::previous_q, print(), rl_add_ref(), rl_clear_refs(), identifier_struct::rl_info, agent_struct::rl_params, agent_struct::sysparams, rl_param_container::temporal_extension, TRACE_RL_SYSPARAM, preference_struct::value, and xml_generate_warning().

Referenced by decide_context_slot().

{
 rl_data *data = goal->id.rl_info;
 Symbol *op = cand->value;    
 bool using_gaps = ( my_agent->rl_params->temporal_extension->get_value() == soar_module::on );
 
 // Make list of just-fired prods
 unsigned int just_fired = 0;
 for ( preference *pref = goal->id.operator_slot->preferences[ NUMERIC_INDIFFERENT_PREFERENCE_TYPE ]; pref; pref = pref->next )
 {
  if ( ( op == pref->value ) && pref->inst->prod->rl_rule )
  {   
   if ( ( just_fired == 0 ) && !data->prev_op_rl_rules->empty() )
   {
    rl_clear_refs( goal );
   }
   
   rl_add_ref( goal, pref->inst->prod );
   just_fired++;   
  }
 }
 if ( just_fired )
 {  
  data->previous_q = cand->numeric_value;
 }
 else
 {
  if ( my_agent->sysparams[ TRACE_RL_SYSPARAM ] && using_gaps &&
   ( data->gap_age == 0 ) && !data->prev_op_rl_rules->empty() )
  {   
   char buf[256];
   SNPRINTF( buf, 254, "gap started (%c%llu)", goal->id.name_letter, static_cast<long long unsigned>(goal->id.name_number) );
   
   print( my_agent, buf );
   xml_generate_warning( my_agent, buf );
  }
  
  if ( !using_gaps )
  {
   if ( !data->prev_op_rl_rules->empty() )
   {
    rl_clear_refs( goal );
   }   
   
   data->previous_q = cand->numeric_value;
  }
  else
  {  
   if ( !data->prev_op_rl_rules->empty() )
   {
    data->gap_age++;
   }
  }
 }
}

void rl_tabulate_reward_value_for_goal	(	agent *	my_agent,
		Symbol *	goal
	)

Definition at line 723 of file reinforcement_learning.cpp.

References agent_struct::bottom_goal, rl_param_container::discount_rate, find_slot(), FLOAT_CONSTANT_SYMBOL_TYPE, rl_data_struct::gap_age, get_number_from_symbol(), soar_module::primitive_param< T >::get_value(), soar_module::constant_param< T >::get_value(), soar_module::primitive_stat< T >::get_value(), rl_stat_container::global_reward, rl_data_struct::hrl_age, rl_param_container::hrl_discount, symbol_union::id, IDENTIFIER_SYMBOL_TYPE, INT_CONSTANT_SYMBOL_TYPE, wme_struct::next, soar_module::on, rl_data_struct::prev_op_rl_rules, rl_data_struct::reward, identifier_struct::reward_header, identifier_struct::rl_info, agent_struct::rl_params, agent_struct::rl_stats, agent_struct::rl_sym_reward, agent_struct::rl_sym_value, soar_module::primitive_stat< T >::set_value(), rl_param_container::temporal_discount, rl_stat_container::total_reward, wme_struct::value, and slot_struct::wmes.

Referenced by do_one_top_level_phase(), remove_existing_context_and_descendents(), and rl_tabulate_reward_values().

{
 rl_data *data = goal->id.rl_info; 
 
 if ( !data->prev_op_rl_rules->empty() )
 {
  slot *s = find_slot( goal->id.reward_header, my_agent->rl_sym_reward );
  slot *t;
  wme *w, *x;
  
  double reward = 0.0;
  double discount_rate = my_agent->rl_params->discount_rate->get_value();
  if ( s )
  {   
   for ( w=s->wmes; w; w=w->next )
   {
    if ( w->value->common.symbol_type == IDENTIFIER_SYMBOL_TYPE )
    {
     t = find_slot( w->value, my_agent->rl_sym_value );
     if ( t )
     {
      for ( x=t->wmes; x; x=x->next )
      {
       if ( ( x->value->common.symbol_type == FLOAT_CONSTANT_SYMBOL_TYPE ) || ( x->value->common.symbol_type == INT_CONSTANT_SYMBOL_TYPE ) )
       {
        reward += get_number_from_symbol( x->value );
       }
      }
     }
    }
   }
   
   // if temporal_discount is off, don't discount for gaps
   unsigned int effective_age = data->hrl_age;
   if (my_agent->rl_params->temporal_discount->get_value() == soar_module::on) {
    effective_age += data->gap_age;
   }
   data->reward += ( reward * pow( discount_rate, static_cast< double >( effective_age ) ) );
  }
  // update stats
  double global_reward = my_agent->rl_stats->global_reward->get_value();
  my_agent->rl_stats->total_reward->set_value( reward );
  my_agent->rl_stats->global_reward->set_value( global_reward + reward );
  if ( ( goal != my_agent->bottom_goal ) && ( my_agent->rl_params->hrl_discount->get_value() == soar_module::on ) )
  {
   data->hrl_age++;
  }
 }
}

void rl_tabulate_reward_values ( agent * my_agent )

Definition at line 778 of file reinforcement_learning.cpp.

References symbol_union::id, identifier_struct::lower_goal, rl_tabulate_reward_value_for_goal(), and agent_struct::top_goal.

Referenced by exploration_choose_according_to_policy(), require_preference_semantics(), and run_preference_semantics().

{
 Symbol *goal = my_agent->top_goal;
 while( goal )
 {
  rl_tabulate_reward_value_for_goal( my_agent, goal );
     goal = goal->id.lower_goal;
 }
}

void rl_update_template_tracking	(	agent *	my_agent,
		const char *	rule_name
	)

Definition at line 434 of file reinforcement_learning.cpp.

References rl_get_template_id(), and agent_struct::rl_template_count.

Referenced by make_production().

{
 int new_id = rl_get_template_id( rule_name );
 if ( ( new_id != -1 ) && ( new_id > my_agent->rl_template_count ) )
  my_agent->rl_template_count = ( new_id + 1 );
}

bool rl_valid_rule ( production * prod )

Definition at line 331 of file reinforcement_learning.cpp.

References production_struct::action_list, MAKE_ACTION, action_struct::next, and NUMERIC_INDIFFERENT_PREFERENCE_TYPE.

Referenced by make_production(), and reteload_node_and_children().

{
 bool numeric_pref = false;
 int num_actions = 0;
 for ( action *a = prod->action_list; a; a = a->next ) 
 {
  num_actions++;
  if ( a->type == MAKE_ACTION )
  {
   if ( a->preference_type == NUMERIC_INDIFFERENT_PREFERENCE_TYPE )
    numeric_pref = true;
  }
 }
 return ( numeric_pref && ( num_actions == 1 ) );
}

bool rl_valid_template ( production * prod )

Definition at line 304 of file reinforcement_learning.cpp.

References production_struct::action_list, BINARY_INDIFFERENT_PREFERENCE_TYPE, identifier_struct::common_symbol_info, symbol_union::id, MAKE_ACTION, action_struct::next, NUMERIC_INDIFFERENT_PREFERENCE_TYPE, rhs_value_is_symbol(), rhs_value_to_symbol(), symbol_common_data_struct::symbol_type, and VARIABLE_SYMBOL_TYPE.

Referenced by parse_production().

{
 bool numeric_pref = false;
 bool var_pref = false;
 int num_actions = 0;
 for ( action *a = prod->action_list; a; a = a->next ) 
 {
  num_actions++;
  if ( a->type == MAKE_ACTION )
  {
   if ( a->preference_type == NUMERIC_INDIFFERENT_PREFERENCE_TYPE )
   {
    numeric_pref = true;
   }
   else if ( a->preference_type == BINARY_INDIFFERENT_PREFERENCE_TYPE )
   { 
    if ( rhs_value_is_symbol( a->referent ) && ( rhs_value_to_symbol( a->referent )->id.common_symbol_info.symbol_type == VARIABLE_SYMBOL_TYPE ) )
     var_pref = true;
   }
  }
 }
 return ( ( num_actions == 1 ) && ( numeric_pref || var_pref ) );
}

void rl_watkins_clear	(	agent *	my_agent,
		Symbol *	goal
	)

Definition at line 1071 of file reinforcement_learning.cpp.

References rl_data_struct::eligibility_traces, symbol_union::id, and identifier_struct::rl_info.

Referenced by exploration_choose_according_to_policy().

{
 goal->id.rl_info->eligibility_traces->clear();
}

Data Structures

Macros

Typedefs

Functions

Macro Definition Documentation

Typedef Documentation

Function Documentation