1 #include <portability.h>
29 #include "soar_TraceNames.h"
36 using namespace soar_TraceNames;
57 if ( !strcmp( policy_name,
"boltzmann" ) )
59 if ( !strcmp( policy_name,
"epsilon-greedy" ) )
61 if ( !strcmp( policy_name,
"first" ) )
63 if ( !strcmp( policy_name,
"last" ) )
65 if ( !strcmp( policy_name,
"random-uniform" ) )
67 if ( !strcmp( policy_name,
"softmax" ) )
78 return "epsilon-greedy";
84 return "random-uniform";
130 newbie->
value = value;
195 return value >= 0 && value <= 1;
283 if ( reduction_rate != 1 )
294 if ( current_value > 0 && reduction_rate != 0.0 )
306 if ( !strcmp( policy_name,
"exponential" ) )
308 if ( !strcmp( policy_name,
"linear" ) )
317 return "exponential";
435 return reduction_rate >= 0 && reduction_rate <= 1;
443 return reduction_rate >= 0;
508 const bool my_rl_enabled =
rl_enabled( my_agent );
525 if ( cand->numeric_value > top_value )
527 top_value = cand->numeric_value;
528 top_rl = cand->rl_contribution;
533 switch ( exploration_policy )
536 return_val = candidates;
593 switch(exploration_policy)
596 return candidates == selection ? 1.0f : 0.0f;
603 unsigned int cand_count = 0;
606 return 1.0 / cand_count;
611 unsigned int cand_count = 0;
612 double total_probability = 0.0;
615 if(cand->numeric_value > 0)
616 total_probability += cand->numeric_value;
619 if(total_probability > 0) {
626 return 1.0 / cand_count;
634 unsigned int top_count = 0;
635 unsigned int cand_count = 0;
638 if(cand->numeric_value > top_value) {
639 top_value = cand->numeric_value;
642 else if(cand->numeric_value == top_value)
646 double retval = epsilon / cand_count;
648 retval += (1.0 - epsilon) / top_count;
658 if(maxq < cand->numeric_value)
659 maxq = cand->numeric_value;
662 double exptotal = 0.0;
663 double expselection = 0.0;
666 double v = exp((cand->numeric_value - maxq) / t);
668 if(cand == selection)
672 return expselection / exptotal;
686 unsigned int cand_count = 0;
705 double total_probability = 0.0;
707 if ( cand->numeric_value > 0 )
711 if ( total_probability == 0.0 )
715 const double selected_probability = total_probability *
SoarRand();
718 double current_sum = 0.0;
721 if ( cand->numeric_value > 0 )
723 current_sum += cand->numeric_value;
724 if ( selected_probability <= current_sum )
761 if (maxq < c->numeric_value)
765 double exptotal = 0.0;
766 std::list<double> expvals;
767 std::list<double>::iterator i;
772 expvals.push_back(v);
779 for (c = candidates, i = expvals.begin(); c; c = c->
next_candidate, i++)
781 double prob = *i / exptotal;
786 xml_att_val( my_agent, kCandidateType, kCandidateTypeSum );
796 for (c = candidates, i = expvals.begin(); c; c = c->
next_candidate, i++) {
817 print( my_agent,
"Value (Sum) = %f", cand->numeric_value );
819 xml_att_val( my_agent, kCandidateName, cand->value );
820 xml_att_val( my_agent, kCandidateType, kCandidateTypeSum );
821 xml_att_val( my_agent, kCandidateValue, cand->numeric_value );
839 int num_max_cand = 0;
843 if ( cand->numeric_value > top_value )
845 top_value = cand->numeric_value;
849 else if ( cand->numeric_value == top_value )
853 if ( num_max_cand == 1 )
890 if ( cand->
value == pref->value )
895 if ( pref->inst->prod->rl_rule )
905 if (cand->
value == pref->value)