Soar Kernel  9.3.2 08-06-12
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
exploration.cpp
Go to the documentation of this file.
1 #include <portability.h>
2 
3 /*************************************************************************
4  * PLEASE SEE THE FILE "license.txt" (INCLUDED WITH THIS SOFTWARE PACKAGE)
5  * FOR LICENSE AND COPYRIGHT INFORMATION.
6  *************************************************************************/
7 
8 /*************************************************************************
9  *
10  * file: exploration.cpp
11  *
12  * =======================================================================
13  * Description : Various functions for exploration
14  * =======================================================================
15  */
16 
17 #include "exploration.h"
18 
19 #include <stdlib.h>
20 #include <math.h>
21 #include <float.h>
22 #include <list>
23 #include <limits>
24 
25 #include "agent.h"
26 #include "soar_rand.h"
27 #include "xml.h"
28 #include "print.h"
29 #include "soar_TraceNames.h"
30 #include "gsysparam.h"
31 #include "reinforcement_learning.h"
32 #include "misc.h"
33 #include "utilities.h"
34 #include "instantiations.h"
35 
36 using namespace soar_TraceNames;
37 
38 /***************************************************************************
39  * Function : exploration_valid_policy
40  **************************************************************************/
41 
42 bool exploration_valid_policy( const char *policy_name )
43 {
44  return exploration_convert_policy( policy_name ) != 0;
45 }
46 
47 bool exploration_valid_policy( const int policy )
48 {
49  return policy > 0 && policy < USER_SELECT_INVALID;
50 }
51 
52 /***************************************************************************
53  * Function : exploration_convert_policy
54  **************************************************************************/
55 const int exploration_convert_policy( const char *policy_name )
56 {
57  if ( !strcmp( policy_name, "boltzmann" ) )
58  return USER_SELECT_BOLTZMANN;
59  if ( !strcmp( policy_name, "epsilon-greedy" ) )
60  return USER_SELECT_E_GREEDY;
61  if ( !strcmp( policy_name, "first" ) )
62  return USER_SELECT_FIRST;
63  if ( !strcmp( policy_name, "last" ) )
64  return USER_SELECT_LAST;
65  if ( !strcmp( policy_name, "random-uniform" ) )
66  return USER_SELECT_RANDOM;
67  if ( !strcmp( policy_name, "softmax" ) )
68  return USER_SELECT_SOFTMAX;
69 
70  return 0;
71 }
72 
73 const char *exploration_convert_policy( const int policy )
74 {
75  if ( policy == USER_SELECT_BOLTZMANN )
76  return "boltzmann";
77  if ( policy == USER_SELECT_E_GREEDY )
78  return "epsilon-greedy";
79  if ( policy == USER_SELECT_FIRST )
80  return "first";
81  if ( policy == USER_SELECT_LAST )
82  return "last";
83  if ( policy == USER_SELECT_RANDOM )
84  return "random-uniform";
85  if ( policy == USER_SELECT_SOFTMAX )
86  return "softmax";
87 
88  return NULL;
89 }
90 
91 /***************************************************************************
92  * Function : exploration_set_policy
93  **************************************************************************/
94 bool exploration_set_policy( agent *my_agent, const char *policy_name )
95 {
96  const int policy = exploration_convert_policy( policy_name );
97 
98  if ( policy )
99  return exploration_set_policy( my_agent, policy );
100 
101  return false;
102 }
103 
104 bool exploration_set_policy( agent *my_agent, const int policy )
105 {
106  if ( exploration_valid_policy( policy ) )
107  {
108  set_sysparam( my_agent, USER_SELECT_MODE_SYSPARAM, policy );
109  return true;
110  }
111 
112  return false;
113 }
114 
115 /***************************************************************************
116  * Function : exploration_get_policy
117  **************************************************************************/
118 const int exploration_get_policy( agent *my_agent )
119 {
120  return static_cast<int>(my_agent->sysparams[ USER_SELECT_MODE_SYSPARAM ]);
121 }
122 
123 /***************************************************************************
124  * Function : exploration_add_parameter
125  **************************************************************************/
126 exploration_parameter *exploration_add_parameter( double value, bool (*val_func)( double ), const char *name )
127 {
128  // new parameter entry
129  exploration_parameter * const newbie = new exploration_parameter;
130  newbie->value = value;
131  newbie->name = name;
133  newbie->val_func = val_func;
135  newbie->rates[ EXPLORATION_REDUCTION_LINEAR ] = 0;
136 
137  return newbie;
138 }
139 
140 /***************************************************************************
141  * Function : exploration_convert_parameter
142  **************************************************************************/
143 const int exploration_convert_parameter( agent *my_agent, const char *name )
144 {
145  for ( int i = 0; i < EXPLORATION_PARAMS; ++i )
146  if ( !strcmp( name, my_agent->exploration_params[ i ]->name ) )
147  return i;
148 
149  return EXPLORATION_PARAMS;
150 }
151 
152 const char *exploration_convert_parameter( agent *my_agent, const int parameter )
153 {
154  return ( parameter >= 0 && parameter < EXPLORATION_PARAMS ) ? my_agent->exploration_params[ parameter ]->name : NULL ;
155 }
156 
157 /***************************************************************************
158  * Function : exploration_valid_parameter
159  **************************************************************************/
160 const bool exploration_valid_parameter( agent *my_agent, const char *name )
161 {
162  return exploration_convert_parameter( my_agent, name ) != EXPLORATION_PARAMS;
163 }
164 
165 const bool exploration_valid_parameter( agent *my_agent, const int parameter )
166 {
167  return exploration_convert_parameter( my_agent, parameter ) != NULL;
168 }
169 
170 /***************************************************************************
171  * Function : exploration_get_parameter_value
172  **************************************************************************/
173 double exploration_get_parameter_value( agent *my_agent, const char *parameter )
174 {
175  const int param = exploration_convert_parameter( my_agent, parameter );
176  if ( param == EXPLORATION_PARAMS )
177  return 0;
178 
179  return my_agent->exploration_params[ param ]->value;
180 }
181 
182 double exploration_get_parameter_value( agent *my_agent, const int parameter )
183 {
184  if ( exploration_valid_parameter( my_agent, parameter ) )
185  return my_agent->exploration_params[ parameter ]->value;
186 
187  return 0;
188 }
189 
190 /***************************************************************************
191  * Function : exploration_validate_epsilon
192  **************************************************************************/
193 bool exploration_validate_epsilon( double value )
194 {
195  return value >= 0 && value <= 1;
196 }
197 
198 /***************************************************************************
199  * Function : exploration_validate_temperature
200  **************************************************************************/
202 {
203  return value > 0;
204 }
205 
206 /***************************************************************************
207  * Function : exploration_valid_parameter_value
208  **************************************************************************/
209 bool exploration_valid_parameter_value( agent *my_agent, const char *name, double value )
210 {
211  const int param = exploration_convert_parameter( my_agent, name );
212  if ( param == EXPLORATION_PARAMS )
213  return false;
214 
215  return my_agent->exploration_params[ param ]->val_func( value );
216 }
217 
218 bool exploration_valid_parameter_value( agent *my_agent, const int parameter, double value )
219 {
220  if ( exploration_valid_parameter( my_agent, parameter ) )
221  return my_agent->exploration_params[ parameter ]->val_func( value );
222 
223  return false;
224 }
225 
226 /***************************************************************************
227  * Function : exploration_set_parameter_value
228  **************************************************************************/
229 bool exploration_set_parameter_value( agent *my_agent, const char *name, double value )
230 {
231  const int param = exploration_convert_parameter( my_agent, name );
232  if ( param == EXPLORATION_PARAMS )
233  return false;
234 
235  my_agent->exploration_params[ param ]->value = value;
236 
237  return true;
238 }
239 
240 bool exploration_set_parameter_value( agent *my_agent, const int parameter, double value )
241 {
242  if ( exploration_valid_parameter( my_agent, parameter ) )
243  {
244  my_agent->exploration_params[ parameter ]->value = value;
245  return true;
246  }
247  else
248  return false;
249 }
250 
251 /***************************************************************************
252  * Function : exploration_get_auto_update
253  **************************************************************************/
255 {
256  return my_agent->sysparams[ USER_SELECT_REDUCE_SYSPARAM ] != FALSE;
257 }
258 
259 /***************************************************************************
260  * Function : exploration_set_auto_update
261  **************************************************************************/
262 bool exploration_set_auto_update( agent *my_agent, bool setting )
263 {
264  my_agent->sysparams[ USER_SELECT_REDUCE_SYSPARAM ] = setting ? TRUE : FALSE;
265 
266  return true;
267 }
268 
269 /***************************************************************************
270  * Function : exploration_update_parameters
271  **************************************************************************/
273 {
274  if ( exploration_get_auto_update( my_agent ) )
275  {
276  for ( int i = 0; i < EXPLORATION_PARAMS; ++i )
277  {
278  const int reduction_policy = exploration_get_reduction_policy( my_agent, i );
279  const double reduction_rate = exploration_get_reduction_rate( my_agent, i, reduction_policy );
280 
281  if ( reduction_policy == EXPLORATION_REDUCTION_EXPONENTIAL )
282  {
283  if ( reduction_rate != 1 )
284  {
285  const double current_value = exploration_get_parameter_value( my_agent, i );
286 
287  exploration_set_parameter_value( my_agent, i, current_value * reduction_rate );
288  }
289  }
290  else if ( reduction_policy == EXPLORATION_REDUCTION_LINEAR )
291  {
292  const double current_value = exploration_get_parameter_value( my_agent, i );
293 
294  if ( current_value > 0 && reduction_rate != 0.0 )
295  exploration_set_parameter_value( my_agent, i, (current_value - reduction_rate > 0) ? (current_value - reduction_rate) : 0 );
296  }
297  }
298  }
299 }
300 
301 /***************************************************************************
302  * Function : exploration_convert_reduction_policy
303  **************************************************************************/
304 const int exploration_convert_reduction_policy( const char *policy_name )
305 {
306  if ( !strcmp( policy_name, "exponential" ) )
308  if ( !strcmp( policy_name, "linear" ) )
310 
311  return EXPLORATION_REDUCTIONS;
312 }
313 
314 const char *exploration_convert_reduction_policy( const int policy )
315 {
316  if ( policy == EXPLORATION_REDUCTION_EXPONENTIAL )
317  return "exponential";
318  if ( policy == EXPLORATION_REDUCTION_LINEAR )
319  return "linear";
320 
321  return NULL;
322 }
323 
324 /***************************************************************************
325  * Function : exploration_get_reduction_policy
326  **************************************************************************/
327 const int exploration_get_reduction_policy( agent *my_agent, const char *parameter )
328 {
329  const int param = exploration_convert_parameter( my_agent, parameter );
330  if ( param == EXPLORATION_PARAMS )
331  return EXPLORATION_REDUCTIONS;
332 
333  return my_agent->exploration_params[ param ]->reduction_policy;
334 }
335 
336 const int exploration_get_reduction_policy( agent *my_agent, const int parameter )
337 {
338  if ( exploration_valid_parameter( my_agent, parameter ) )
339  return my_agent->exploration_params[ parameter ]->reduction_policy;
340  else
341  return EXPLORATION_REDUCTIONS;
342 }
343 
344 /***************************************************************************
345  * Function : exploration_valid_reduction_policy
346  **************************************************************************/
347 bool exploration_valid_reduction_policy( agent * /*my_agent*/, const char * /*parameter*/, const char *policy_name )
348 {
350 }
351 
352 bool exploration_valid_reduction_policy( agent * /*my_agent*/, const char * /*parameter*/, const int policy )
353 {
354  return exploration_convert_reduction_policy( policy ) != NULL;
355 }
356 
357 bool exploration_valid_reduction_policy( agent * /*my_agent*/, const int /*parameter*/, const int policy )
358 {
359  return exploration_convert_reduction_policy( policy ) != NULL;
360 }
361 
362 /***************************************************************************
363  * Function : exploration_set_reduction_policy
364  **************************************************************************/
365 bool exploration_set_reduction_policy( agent *my_agent, const char *parameter, const char *policy_name )
366 {
367  const int param = exploration_convert_parameter( my_agent, parameter );
368  if ( param == EXPLORATION_PARAMS )
369  return false;
370 
371  const int policy = exploration_convert_reduction_policy( policy_name );
372  if ( policy == EXPLORATION_REDUCTIONS )
373  return false;
374 
375  my_agent->exploration_params[ param ]->reduction_policy = policy;
376 
377  return true;
378 }
379 
380 bool exploration_set_reduction_policy( agent *my_agent, const int parameter, const int policy )
381 {
382  if ( exploration_valid_parameter( my_agent, parameter ) &&
383  exploration_valid_reduction_policy( my_agent, parameter, policy ) )
384  {
385  my_agent->exploration_params[ parameter ]->reduction_policy = policy;
386  return true;
387  }
388 
389  return false;
390 }
391 
392 /***************************************************************************
393  * Function : exploration_valid_reduction_rate
394  **************************************************************************/
395 bool exploration_valid_reduction_rate( agent *my_agent, const char *parameter, const char *policy_name, double reduction_rate )
396 {
397  const int param = exploration_convert_parameter( my_agent, parameter );
398  if ( param == EXPLORATION_PARAMS )
399  return false;
400 
401  const int policy = exploration_convert_reduction_policy( policy_name );
402  if ( policy == EXPLORATION_REDUCTIONS )
403  return false;
404 
405  return exploration_valid_reduction_rate( my_agent, param, policy, reduction_rate );
406 }
407 
408 bool exploration_valid_reduction_rate( agent *my_agent, const int parameter, const int policy, double reduction_rate )
409 {
410  if ( !exploration_valid_reduction_policy( my_agent, parameter, policy ) )
411  return false;
412 
413  switch ( policy )
414  {
416  return exploration_valid_exponential( reduction_rate );
417  break;
418 
420  return exploration_valid_linear( reduction_rate );
421  break;
422 
423  default:
424  break;
425  }
426 
427  return false;
428 }
429 
430 /***************************************************************************
431  * Function : exploration_valid_exponential
432  **************************************************************************/
433 bool exploration_valid_exponential( double reduction_rate )
434 {
435  return reduction_rate >= 0 && reduction_rate <= 1;
436 }
437 
438 /***************************************************************************
439  * Function : exploration_valid_linear
440  **************************************************************************/
441 bool exploration_valid_linear( double reduction_rate )
442 {
443  return reduction_rate >= 0;
444 }
445 
446 /***************************************************************************
447  * Function : exploration_get_reduction_rate
448  **************************************************************************/
449 double exploration_get_reduction_rate( agent *my_agent, const char *parameter, const char *policy_name )
450 {
451  const int param = exploration_convert_parameter( my_agent, parameter );
452  if ( param == EXPLORATION_PARAMS )
453  return 0;
454 
455  const int policy = exploration_convert_reduction_policy( policy_name );
456  if ( policy == EXPLORATION_REDUCTIONS )
457  return 0;
458 
459  return exploration_get_reduction_rate( my_agent, param, policy );
460 }
461 
462 double exploration_get_reduction_rate( agent *my_agent, const int parameter, const int policy )
463 {
464  if ( exploration_valid_parameter( my_agent, parameter ) &&
465  exploration_valid_reduction_policy( my_agent, parameter, policy ) )
466  return my_agent->exploration_params[ parameter ]->rates[ policy ];
467 
468  return 0;
469 }
470 
471 /***************************************************************************
472  * Function : exploration_set_reduction_rate
473  **************************************************************************/
474 bool exploration_set_reduction_rate( agent *my_agent, const char *parameter, const char *policy_name, double reduction_rate )
475 {
476  const int param = exploration_convert_parameter( my_agent, parameter );
477  if ( param == EXPLORATION_PARAMS )
478  return false;
479 
480  const int policy = exploration_convert_reduction_policy( policy_name );
481  if ( policy == EXPLORATION_REDUCTIONS )
482  return false;
483 
484  return exploration_set_reduction_rate( my_agent, param, policy, reduction_rate );
485 }
486 
487 bool exploration_set_reduction_rate( agent *my_agent, const int parameter, const int policy, double reduction_rate )
488 {
489  if ( exploration_valid_parameter( my_agent, parameter ) &&
490  exploration_valid_reduction_policy( my_agent, parameter, policy ) &&
491  exploration_valid_reduction_rate( my_agent, parameter, policy, reduction_rate ) )
492  {
493  my_agent->exploration_params[ parameter ]->rates[ policy ] = reduction_rate;
494  return true;
495  }
496 
497  return false;
498 }
499 
500 /***************************************************************************
501  * Function : exploration_choose_according_to_policy
502  **************************************************************************/
504 {
505  const int exploration_policy = exploration_get_policy( my_agent );
506  preference *return_val = NULL;
507 
508  const bool my_rl_enabled = rl_enabled( my_agent );
509 
510  const rl_param_container::learning_choices my_learning_policy = my_rl_enabled ? my_agent->rl_params->learning_policy->get_value() : rl_param_container::q;
511 
512  // get preference values for each candidate
513  // see soar_ecPrintPreferences
514  for ( preference *cand = candidates; cand; cand = cand->next_candidate )
515  exploration_compute_value_of_candidate( my_agent, cand, s );
516 
517  double top_value = candidates->numeric_value;
518  bool top_rl = candidates->rl_contribution;
519 
520  // should find highest valued candidate in q-learning
521  if ( my_rl_enabled && my_learning_policy == rl_param_container::q )
522  {
523  for ( const preference * cand = candidates; cand; cand = cand->next_candidate )
524  {
525  if ( cand->numeric_value > top_value )
526  {
527  top_value = cand->numeric_value;
528  top_rl = cand->rl_contribution;
529  }
530  }
531  }
532 
533  switch ( exploration_policy )
534  {
535  case USER_SELECT_FIRST:
536  return_val = candidates;
537  break;
538 
539  case USER_SELECT_LAST:
540  for ( return_val = candidates; return_val->next_candidate; return_val = return_val->next_candidate );
541  break;
542 
543  case USER_SELECT_RANDOM:
544  return_val = exploration_randomly_select( candidates );
545  break;
546 
547  case USER_SELECT_SOFTMAX:
548  return_val = exploration_probabilistically_select( candidates );
549  break;
550 
552  return_val = exploration_epsilon_greedy_select( my_agent, candidates );
553  break;
554 
556  return_val = exploration_boltzmann_select( my_agent, candidates );
557  break;
558  }
559 
560  // should perform update here for chosen candidate in sarsa
561  if ( my_rl_enabled )
562  {
563  rl_tabulate_reward_values( my_agent );
564 
565  if ( my_learning_policy == rl_param_container::sarsa )
566  {
567  rl_perform_update( my_agent, return_val->numeric_value, return_val->rl_contribution, s->id );
568  }
569  else if ( my_learning_policy == rl_param_container::q )
570  {
571  rl_perform_update( my_agent, top_value, top_rl, s->id );
572 
573  if ( return_val->numeric_value != top_value )
574  rl_watkins_clear( my_agent, s->id );
575  }
576  }
577 
578  return return_val;
579 }
580 
581 /***************************************************************************
582  * Function : exploration_probability_according_to_policy, bazald
583  **************************************************************************/
584 double exploration_probability_according_to_policy( agent *my_agent, slot *s, preference *candidates, preference *selection )
585 {
586  const int exploration_policy = exploration_get_policy(my_agent);
587 
588  // get preference values for each candidate
589  // see soar_ecPrintPreferences
590  for(preference *cand = candidates; cand; cand = cand->next_candidate)
591  exploration_compute_value_of_candidate(my_agent, cand, s);
592 
593  switch(exploration_policy)
594  {
595  case USER_SELECT_FIRST:
596  return candidates == selection ? 1.0f : 0.0f;
597 
598  case USER_SELECT_LAST:
599  return selection->next_candidate ? 0.0f: 1.0f;
600 
601  case USER_SELECT_RANDOM:
602  {
603  unsigned int cand_count = 0;
604  for(const preference * cand = candidates; cand; cand = cand->next_candidate)
605  ++cand_count;
606  return 1.0 / cand_count;
607  }
608 
609  case USER_SELECT_SOFTMAX:
610  {
611  unsigned int cand_count = 0;
612  double total_probability = 0.0;
613  for(const preference *cand = candidates; cand; cand = cand->next_candidate) {
614  ++cand_count;
615  if(cand->numeric_value > 0)
616  total_probability += cand->numeric_value;
617  }
618 
619  if(total_probability > 0) {
620  if(selection->numeric_value > 0)
621  return selection->numeric_value / total_probability;
622  else
623  return 0.0;
624  }
625  else
626  return 1.0 / cand_count;
627  }
628 
630  {
631  const double epsilon = exploration_get_parameter_value(my_agent, EXPLORATION_PARAM_EPSILON);
632 
633  double top_value = candidates->numeric_value;
634  unsigned int top_count = 0;
635  unsigned int cand_count = 0;
636  for(const preference * cand = candidates; cand; cand = cand->next_candidate) {
637  ++cand_count;
638  if(cand->numeric_value > top_value) {
639  top_value = cand->numeric_value;
640  top_count = 1;
641  }
642  else if(cand->numeric_value == top_value)
643  ++top_count;
644  }
645 
646  double retval = epsilon / cand_count;
647  if(selection->numeric_value == top_value)
648  retval += (1.0 - epsilon) / top_count;
649  return retval;
650  }
651 
653  {
655 
656  double maxq = candidates->numeric_value;
657  for(preference *cand = candidates->next_candidate; cand; cand = cand->next_candidate) {
658  if(maxq < cand->numeric_value)
659  maxq = cand->numeric_value;
660  }
661 
662  double exptotal = 0.0;
663  double expselection = 0.0;
664  for(preference *cand = candidates; cand; cand = cand->next_candidate) {
665  // equivalent to exp((cand->numeric_value / t) - (maxq / t)) but safer against overflow
666  double v = exp((cand->numeric_value - maxq) / t);
667  exptotal += v;
668  if(cand == selection)
669  expselection = v;
670  }
671 
672  return expselection / exptotal;
673  }
674 
675  default:
676  abort();
677  return 0.0;
678  }
679 }
680 
681 /***************************************************************************
682  * Function : exploration_randomly_select
683  **************************************************************************/
685 {
686  unsigned int cand_count = 0;
687  for ( const preference * cand = candidates; cand; cand = cand->next_candidate )
688  ++cand_count;
689 
690  preference * cand = candidates;
691  for( uint32_t chosen_num = SoarRandInt( cand_count - 1 ); chosen_num; --chosen_num )
692  cand = cand->next_candidate;
693 
694  return cand;
695 }
696 
697 /***************************************************************************
698  * Function : exploration_probabilistically_select
699  **************************************************************************/
701 {
702  // IF THIS FUNCTION CHANGES, SEE soar_ecPrintPreferences
703 
704  // count up positive numbers
705  double total_probability = 0.0;
706  for ( const preference *cand = candidates; cand; cand = cand->next_candidate )
707  if ( cand->numeric_value > 0 )
708  total_probability += cand->numeric_value;
709 
710  // if nothing positive, resort to random
711  if ( total_probability == 0.0 )
712  return exploration_randomly_select( candidates );
713 
714  // choose a random preference within the distribution
715  const double selected_probability = total_probability * SoarRand();
716 
717  // select the candidate based upon the chosen preference
718  double current_sum = 0.0;
719  for ( preference *cand = candidates; cand; cand = cand->next_candidate )
720  {
721  if ( cand->numeric_value > 0 )
722  {
723  current_sum += cand->numeric_value;
724  if ( selected_probability <= current_sum )
725  return cand;
726  }
727  }
728 
729  return NIL;
730 }
731 
732 /*
733  * Select a candidate whose Q-value is Q_i with probability
734  *
735  * e^(Q_i / t) / sum(j=1 to n, e^(Q_j / t)).
736  *
737  * Since Q values can get very large or very small (negative values),
738  * overflow and underflow problems can occur when calculating the
739  * exponentials. This is avoided by subtracting a constant k from
740  * all exponent values involved. This doesn't affect the actual
741  * probabilities with which candidates are chosen, because subtracting
742  * a constant from an exponent is equivalent to dividing by the base
743  * raised to that constant, and the divisors cancel out during the
744  * calculation.
745  *
746  * k is chosen to be Q_max / t. This means that the values of all
747  * numerator exponentials are at most 1, and the value of the sum in the
748  * denominator is between 1 and n. This gets rid of the overflow problem
749  * completely, and in the cases where underflow will occur, the actual
750  * probability of the action being considered will be so small (< 10^-300)
751  * that it's negligible.
752  */
754 {
756  double maxq;
757  preference* c;
758 
759  maxq = candidates->numeric_value;
760  for (c = candidates->next_candidate; c; c = c->next_candidate) {
761  if (maxq < c->numeric_value)
762  maxq = c->numeric_value;
763  }
764 
765  double exptotal = 0.0;
766  std::list<double> expvals;
767  std::list<double>::iterator i;
768 
769  for (c = candidates; c; c = c->next_candidate) {
770  // equivalent to exp((c->numeric_value / t) - (maxq / t)) but safer against overflow
771  double v = exp((c->numeric_value - maxq) / t);
772  expvals.push_back(v);
773  exptotal += v;
774  }
775 
776  // output trace information
777  if ( my_agent->sysparams[ TRACE_INDIFFERENT_SYSPARAM ] )
778  {
779  for (c = candidates, i = expvals.begin(); c; c = c->next_candidate, i++)
780  {
781  double prob = *i / exptotal;
782  print_with_symbols( my_agent, "\n Candidate %y: ", c->value );
783  print( my_agent, "Value (Sum) = %f, (Prob) = %f", c->numeric_value, prob );
784  xml_begin_tag( my_agent, kTagCandidate );
785  xml_att_val( my_agent, kCandidateName, c->value );
786  xml_att_val( my_agent, kCandidateType, kCandidateTypeSum );
787  xml_att_val( my_agent, kCandidateValue, c->numeric_value );
788  xml_att_val( my_agent, kCandidateExpValue, prob );
789  xml_end_tag( my_agent, kTagCandidate );
790  }
791  }
792 
793  double r = SoarRand(exptotal);
794  double sum = 0.0;
795 
796  for (c = candidates, i = expvals.begin(); c; c = c->next_candidate, i++) {
797  sum += *i;
798  if (sum >= r)
799  return c;
800  }
801 
802  return NIL;
803 }
804 
805 /***************************************************************************
806  * Function : exploration_epsilon_greedy_select
807  **************************************************************************/
809 {
810  const double epsilon = exploration_get_parameter_value( my_agent, EXPLORATION_PARAM_EPSILON );
811 
812  if ( my_agent->sysparams[ TRACE_INDIFFERENT_SYSPARAM ] )
813  {
814  for ( const preference *cand = candidates; cand; cand = cand->next_candidate )
815  {
816  print_with_symbols( my_agent, "\n Candidate %y: ", cand->value );
817  print( my_agent, "Value (Sum) = %f", cand->numeric_value );
818  xml_begin_tag( my_agent, kTagCandidate );
819  xml_att_val( my_agent, kCandidateName, cand->value );
820  xml_att_val( my_agent, kCandidateType, kCandidateTypeSum );
821  xml_att_val( my_agent, kCandidateValue, cand->numeric_value );
822  xml_end_tag( my_agent, kTagCandidate );
823  }
824  }
825 
826  if ( SoarRand() < epsilon )
827  return exploration_randomly_select( candidates );
828  else
829  return exploration_get_highest_q_value_pref( candidates );
830 }
831 
832 /***************************************************************************
833  * Function : exploration_get_highest_q_value_pref
834  **************************************************************************/
836 {
837  preference *top_cand = candidates;
838  double top_value = candidates->numeric_value;
839  int num_max_cand = 0;
840 
841  for ( preference * cand = candidates; cand; cand = cand->next_candidate )
842  {
843  if ( cand->numeric_value > top_value )
844  {
845  top_value = cand->numeric_value;
846  top_cand = cand;
847  num_max_cand = 1;
848  }
849  else if ( cand->numeric_value == top_value )
850  ++num_max_cand;
851  }
852 
853  if ( num_max_cand == 1 )
854  return top_cand;
855  else
856  {
857  preference *cand = candidates;
858  while ( cand->numeric_value != top_value )
859  cand = cand->next_candidate;
860 
861  // if operators tied for highest Q-value, select among tied set at random
862  for ( uint32_t chosen_num = SoarRandInt( num_max_cand - 1 ); chosen_num; --chosen_num )
863  {
864  cand = cand->next_candidate;
865 
866  while ( cand->numeric_value != top_value )
867  cand = cand->next_candidate;
868  }
869 
870  return cand;
871  }
872 }
873 
874 /***************************************************************************
875  * Function : exploration_compute_value_of_candidate
876  **************************************************************************/
877 void exploration_compute_value_of_candidate( agent *my_agent, preference *cand, slot *s, double default_value )
878 {
879  if ( !cand )
880  return;
881 
882  // initialize candidate values
884  cand->numeric_value = 0;
885  cand->rl_contribution = false;
886 
887  // all numeric indifferents
888  for ( preference *pref = s->preferences[ NUMERIC_INDIFFERENT_PREFERENCE_TYPE ]; pref; pref = pref->next)
889  {
890  if ( cand->value == pref->value )
891  {
893  cand->numeric_value += get_number_from_symbol( pref->referent );
894 
895  if ( pref->inst->prod->rl_rule )
896  {
897  cand->rl_contribution = true;
898  }
899  }
900  }
901 
902  // all binary indifferents
903  for ( preference *pref = s->preferences[ BINARY_INDIFFERENT_PREFERENCE_TYPE ]; pref; pref = pref->next )
904  {
905  if (cand->value == pref->value)
906  {
908  cand->numeric_value += get_number_from_symbol( pref->referent );
909  }
910  }
911 
912  // if no contributors, provide default
913  if ( !cand->total_preferences_for_candidate )
914  {
915  cand->numeric_value = default_value;
917  }
918 
919  // accomodate average mode
922 }