Soar Kernel  9.3.2 08-06-12
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
reinforcement_learning.h
Go to the documentation of this file.
1 /*************************************************************************
2  * PLEASE SEE THE FILE "license.txt" (INCLUDED WITH THIS SOFTWARE PACKAGE)
3  * FOR LICENSE AND COPYRIGHT INFORMATION.
4  *************************************************************************/
5 
6 /*************************************************************************
7  *
8  * file: reinforcement_learning.h
9  *
10  * =======================================================================
11  */
12 
13 #ifndef REINFORCEMENT_LEARNING_H
14 #define REINFORCEMENT_LEARNING_H
15 
16 #include <map>
17 #include <string>
18 #include <list>
19 #include <vector>
20 
21 #include "soar_module.h"
22 
23 #include "chunk.h"
24 #include "production.h"
25 
27 // RL Constants
29 
30 // more specific forms of no change impasse types
31 // made negative to never conflict with impasse constants
32 #define STATE_NO_CHANGE_IMPASSE_TYPE -1
33 #define OP_NO_CHANGE_IMPASSE_TYPE -2
34 
36 // RL Parameters
38 
39 class rl_learning_param;
40 class rl_apoptosis_param;
42 
43 template <typename T>
45  public:
46  virtual void set_param(production * const prod, T value) const = 0;
47  virtual T get_param(const production * const prod) const = 0;
48  void set_param(production * const prod, std::string value_str) const {
49  T value;
50  std::istringstream iss(value_str);
51  iss >> value;
52  set_param(prod, value);
53  }
54 };
55 
56 class rl_updates_accessor : public param_accessor<double> {
57  virtual void set_param(production * const prod, double value) const {
58  prod->rl_update_count = value;
59  }
60  virtual double get_param(const production * const prod) const {
61  return prod->rl_update_count;
62  }
63 };
64 
65 class rl_dbd_h_accessor : public param_accessor<double> {
66  virtual void set_param(production * const prod, double value) const {
67  prod->rl_delta_bar_delta_h = value;
68  }
69  virtual double get_param(const production * const prod) const {
70  return prod->rl_delta_bar_delta_h;
71  }
72 };
73 
75 {
76  public:
78 
79  // How the learning rate cools over time.
80  // normal_decay: default, same learning rate for each rule
81  // exponential_decay: rate = rate / # updates for this rule
82  // logarithmic_decay: rate = rate / log(# updates for this rule)
83  // Miller, 11/14/2011
85 
87 
99 
101  soar_module::boolean_param *meta; // Whether doc strings are used for storing metadata.
102  soar_module::string_param *update_log_path; // If non-null and size > 0, log all RL updates to this file.
103 
107 
108  rl_param_container( agent *new_agent );
109 
110  // For writing parameters to a rule's documentation string.
111  static const std::vector<std::pair<std::string, param_accessor<double> * > > &get_documentation_params();
112 };
113 
115 {
116  protected:
118 
119  public:
120  rl_learning_param( const char *new_name, soar_module::boolean new_value, soar_module::predicate<soar_module::boolean> *new_prot_pred, agent *new_agent );
121  void set_value( soar_module::boolean new_value );
122 };
123 
124 class rl_apoptosis_param: public soar_module::constant_param< rl_param_container::apoptosis_choices >
125 {
126  protected:
128 
129  public:
132 };
133 
135 {
136  public:
137  rl_apoptosis_thresh_param( const char *new_name, double new_value, soar_module::predicate<double> *new_val_pred, soar_module::predicate<double> *new_prot_pred );
138  void set_value( double new_value );
139 };
140 
141 template <typename T>
143 {
144  public:
145  rl_apoptosis_predicate( agent *new_agent );
146  bool operator() ( T val );
147 };
148 
150 // RL Statistics
152 
154 {
155  public:
159 
160  rl_stat_container( agent *new_agent );
161 };
162 
163 
165 // RL Types
167 
168 // map of eligibility traces
169 #ifdef USE_MEM_POOL_ALLOCATORS
170 typedef std::map< production*, double, std::less< production* >, soar_module::soar_memory_pool_allocator< std::pair< production*, double > > > rl_et_map;
171 #else
172 typedef std::map< production*, double > rl_et_map;
173 #endif
174 
175 // list of rules associated with the last operator
176 #ifdef USE_MEM_POOL_ALLOCATORS
177 typedef std::list< production*, soar_module::soar_memory_pool_allocator< production* > > rl_rule_list;
178 #else
179 typedef std::list< production* > rl_rule_list;
180 #endif
181 
182 // rl data associated with each state
183 typedef struct rl_data_struct {
184  rl_et_map *eligibility_traces; // traces associated with productions
185  rl_rule_list *prev_op_rl_rules; // rl rules associated with the previous operator
186 
187  double previous_q; // q-value of the previous state
188  double reward; // accumulated discounted reward
189 
190  unsigned int gap_age; // the number of steps since a cycle containing rl rules
191  unsigned int hrl_age; // the number of steps in a subgoal
192 } rl_data;
193 
194 typedef std::map< Symbol*, Symbol* > rl_symbol_map;
195 typedef std::set< rl_symbol_map > rl_symbol_map_set;
196 
197 // used to manage apoptosis
199 
201 // Maintenance
203 
204 // remove Soar-RL references to productions
205 extern void rl_remove_refs_for_prod( agent *my_agent, production *prod );
206 extern void rl_clear_refs( Symbol* goal );
207 
209 // Parameter Get/Set/Validate
211 
212 // shortcut for determining if Soar-RL is enabled
213 extern bool rl_enabled( agent *my_agent );
214 
216 // Production Validation
218 
219 // validate template
220 extern bool rl_valid_template( production *prod );
221 
222 // validate rl rule
223 extern bool rl_valid_rule( production *prod );
224 
225 // sets rl meta-data from a production documentation string
226 extern void rl_rule_meta( agent* my_agent, production* prod );
227 
228 // template instantiation
229 extern int rl_get_template_id( const char *prod_name );
230 
232 // Template Tracking
234 
235 // initializes agent's tracking of template-originated rl-rules
236 extern void rl_initialize_template_tracking( agent *my_agent );
237 
238 // updates the agent's tracking of template-originated rl-rules
239 extern void rl_update_template_tracking( agent *my_agent, const char *rule_name );
240 
241 // get the next id for a template (increments internal counter)
242 extern int rl_next_template_id( agent *my_agent );
243 
244 // reverts internal counter
245 extern void rl_revert_template_id( agent *my_agent );
246 
248 // Template Behavior
250 
251 // builds a new Soar-RL rule from a template instantiation
252 extern Symbol *rl_build_template_instantiation( agent *my_agent, instantiation *my_template_instance, struct token_struct *tok, wme *w );
253 
254 // creates an incredibly simple action
255 extern action *rl_make_simple_action( agent *my_gent, Symbol *id_sym, Symbol *attr_sym, Symbol *val_sym, Symbol *ref_sym );
256 
257 // adds a test to a condition list for goals or impasses contained within the condition list
258 extern void rl_add_goal_or_impasse_tests_to_conds(agent *my_agent, condition *all_conds);
259 
261 // Reward
263 
264 // tabulation of a single goal's reward
265 extern void rl_tabulate_reward_value_for_goal( agent *my_agent, Symbol *goal );
266 
267 // tabulation of all agent goal reward
268 extern void rl_tabulate_reward_values( agent *my_agent );
269 
271 // Updates
273 
274 // Store and update data that will be needed later to perform a Bellman update for the current operator
275 extern void rl_store_data( agent *my_agent, Symbol *goal, preference *cand );
276 
277 // update the value of Soar-RL rules
278 extern void rl_perform_update( agent *my_agent, double op_value, bool op_rl, Symbol *goal, bool update_efr = true );
279 
280 // clears eligibility traces in accordance with watkins
281 extern void rl_watkins_clear( agent *my_agent, Symbol *goal );
282 
283 #endif