1 #include <portability.h>
46 static std::vector<std::pair<std::string, param_accessor<double> *> > documentation_params;
47 static bool initted =
false;
53 documentation_params.push_back(std::make_pair(
"delta-bar-delta-h",
new rl_dbd_h_accessor()));
55 return documentation_params;
146 if ( new_value !=
value )
163 if (
value != new_value )
190 template <
typename T>
193 template <
typename T>
242 for ( rl_rule_list::iterator p=rules->begin(); p!=rules->end(); p++ )
250 rules->remove( prod );
257 for ( rl_rule_list::iterator p=rules->begin(); p!=rules->end(); p++ )
259 (*p)->rl_ref_count--;
294 state->id.rl_info->eligibility_traces->erase( prod );
306 bool numeric_pref =
false;
307 bool var_pref =
false;
327 return ( ( num_actions == 1 ) && ( numeric_pref || var_pref ) );
333 bool numeric_pref =
false;
346 return ( numeric_pref && ( num_actions == 1 ) );
357 for (std::vector<std::pair<std::string,
param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin();
358 doc_params_it != documentation_params.end(); ++doc_params_it) {
359 const std::string ¶m_name = doc_params_it->first;
361 std::stringstream param_name_ss;
362 param_name_ss << param_name <<
"=";
363 std::string search_term = param_name_ss.str();
364 size_t begin_index = doc.find(search_term);
365 if (begin_index == std::string::npos)
continue;
366 begin_index += search_term.size();
367 size_t end_index = doc.find(
";", begin_index);
368 if (end_index == std::string::npos)
continue;
369 std::string param_value_str = doc.substr(begin_index, end_index);
370 accessor->
set_param(prod, param_value_str);
397 std::string temp = prod_name;
400 if ( temp.length() < 6 )
404 if ( temp.compare( 0, 3,
"rl*" ) )
408 std::string::size_type last_star = temp.find_last_of(
'*' );
409 if ( last_star == std::string::npos )
413 if ( last_star == ( temp.length() - 1 ) )
417 std::string id_str = temp.substr( last_star + 1 );
423 from_string(
id, id_str );
458 constants->insert( std::make_pair< Symbol*, Symbol* >( p_sym, i_sym ) );
471 rl_get_symbol_constant( *(reinterpret_cast<Symbol**>( p_test )), *(reinterpret_cast<Symbol**>( i_test )), constants );
534 p_cond = p_cond->
next;
535 i_cond = i_cond->
next;
542 Symbol* return_val = NULL;
572 if ( ins_result.second )
578 double init_value = 0;
583 std::string new_name =
"";
584 std::string empty_string =
"";
590 to_string( new_id, temp_id );
591 new_name = (
"rl*" + empty_string + my_template->
name->
sc.
name +
"*" + temp_id );
627 init_value =
static_cast< double >( referent->
ic.
value );
631 init_value = referent->
fc.
value;
634 new_production->
rl_ecr = 0.0;
635 new_production->
rl_efr = init_value;
644 new_name_symbol = NULL;
648 return_val = new_name_symbol;
661 allocate_with_pool( my_agent, &my_agent->
action_pool, &rhs );
700 for (
condition *cond = all_conds; cond !=
NIL; cond = cond->next )
757 unsigned int effective_age = data->
hrl_age;
759 effective_age += data->
gap_age;
762 data->
reward += ( reward * pow( discount_rate, static_cast< double >( effective_age ) ) );
798 unsigned int just_fired = 0;
801 if ( ( op == pref->value ) && pref->inst->prod->rl_rule )
825 print( my_agent, buf );
853 if ( !using_gaps || op_rl )
859 rl_et_map::iterator iter;
867 unsigned int effective_age = data->
hrl_age + 1;
869 effective_age += data->
gap_age;
872 double discount = pow( gamma, static_cast< double >( effective_age ) );
880 print( my_agent, buf );
896 iter->second *= lambda;
897 iter->second *= discount;
898 if ( iter->second < tolerance )
910 double sum_old_ecr = 0.0;
911 double sum_old_efr = 0.0;
914 double trace_increment = ( 1.0 /
static_cast<double>( data->
prev_op_rl_rules->size() ) );
915 rl_rule_list::iterator p;
919 sum_old_ecr += (*p)->rl_ecr;
920 sum_old_efr += (*p)->rl_efr;
926 iter->second += trace_increment;
937 double old_ecr, old_efr;
938 double delta_ecr, delta_efr;
939 double new_combined, new_ecr, new_efr;
940 double delta_t = (data->
reward + discount * op_value) - (sum_old_ecr + sum_old_efr);
952 double adjusted_alpha;
967 double decay_term = 1.0 - adjusted_alpha * 1.0 * 1.0;
968 if (decay_term < 0.0) decay_term = 0.0;
974 adjusted_alpha = alpha;
979 delta_ecr = ( adjusted_alpha * iter->second * ( data->
reward - sum_old_ecr ) );
983 delta_efr = ( adjusted_alpha * iter->second * ( ( discount * op_value ) - sum_old_efr ) );
991 new_ecr = ( old_ecr + delta_ecr );
992 new_efr = ( old_efr + delta_efr );
993 new_combined = ( new_ecr + new_efr );
998 std::ostringstream ss;
999 ss <<
"RL update " << prod->
name->
sc.
name <<
" "
1000 << old_ecr <<
" " << old_efr <<
" " << old_ecr + old_efr <<
" -> "
1001 << new_ecr <<
" " << new_efr <<
" " << new_combined ;
1003 std::string temp_str( ss.str() );
1004 print( my_agent,
"%s\n", temp_str.c_str() );
1009 if (!log_path.empty()) {
1010 std::ofstream file(log_path.c_str(), std::ios_base::app);
1011 file << ss.str() << std::endl;
1030 std::stringstream doc_ss;
1032 for (std::vector<std::pair<std::string,
param_accessor<double> *> >::const_iterator doc_params_it = documentation_params.begin();
1033 doc_params_it != documentation_params.end(); ++doc_params_it) {
1034 doc_ss << doc_params_it->first <<
"=" << doc_params_it->second->get_param(prod) <<
";";