; LISP code for the Temporal-Difference (TD) model of classical conditioning. ; As specified in: ; ; Sutton, R.S., Barto, A.G. (1990) "Time-Derivative Models of Pavlovian ; Reinforcement," in Learning and Computational Neuroscience: Foundations ; of Adaptive Networks, M. Gabriel and J. Moore, Eds., pp. 497--537. ; MIT Press. ftp://ftp.cs.umass.edu/pub/anw/pub/sutton/sutton-barto-90.ps ; ; This code was written by Rich Sutton. June 3, 1996 ; ; New experiments can easily be created by adding additional stimulus vectors, etc. (defvar n) ; number of stimuli, length of lists (defvar V) ; list of associative strengths (defvar trace) ; list of stimulus traces (defvar old-Vbar) ; current V dotted with last X (defvar time) ; steps since beginning of experiment (defvar alpha 0.1) ; Basic parameters of model (defvar beta 1.0) ; See Sutton & Barto, 1990 (defvar delta 0.2) (defvar gamma 0.95) (defun setup (n-arg) "gets setup for an experiment with n-arg inputs, including any background" (setq time 0) (setq n n-arg) (setq V (make-list n :initial-element 0.0)) (setq old-Vbar 0) (setq trace (make-list n :initial-element 0.0))) (defun Vbar (V X) "Computes prediction Vbar for inputs X given associative strengths V" (max 0 (loop for V-i in V for X-i in X sum (* V-i X-i)))) (defun steps (num-steps X lambda) "Runs TD model for num-steps time steps with CSs=X (a list) and US=lambda" (loop repeat num-steps for new-Vbar = (Vbar V X) for alpha-beta-error = (* alpha beta (+ lambda (* gamma new-Vbar) (- old-Vbar))) do (incf time) do (loop for i below n for X-i in X for trace-i in trace do (incf (nth i V) (* alpha-beta-error trace-i)) do (incf (nth i trace) (* delta (- X-i trace-i)))) ; do (format t "~%~D i:~D trace:~F V:~F abe:~F" ; time i (nth i trace) (nth i V) alpha-beta-error)) do (setq old-Vbar (Vbar V X)))) (defun trace-conditioning-with-background (ISI) "Does trace conditioning experiment for ISIs >= CS duration (in number of steps). CS duration is 4. US duration is 1" (when (< ISI 4) (error "This program won't work for ISIs < CS duration")) (setup 2) (steps 100 '(1 0) 0) ; inter-trial-interval (loop for trial below 20 do (steps 4 '(1 1) 0) ; present CS with background (steps (- ISI 4) '(1 0) 0) ; trace interval (steps 1 '(1 0) 1) ; US/reward (steps 100 '(1 0) 0) ; inter-trial-interval (print (list trial V)) )) (defun trace-ISI-function () "Generates ISI function for trace conditioning" (loop for ISI in '(4 6 8 10 12 14 16 18 20 30 50) collect (progn (trace-conditioning-with-background ISI) (second V)))) (defun backward-conditioning (ISI) "Does backwards conditioning experiment for ISIs >= US duration (in number of steps). CS duration is 4. US duration is 1" (when (< ISI 1) (error "This program won't work for ISIs < US duration")) (setup 2) (steps 100 '(1 0) 0) ; inter-trial-interval (loop for trial below 20 do (steps 1 '(1 0) 1) ; US/reward (steps (- ISI 1) '(1 0) 0) ; trace interval (steps 4 '(1 1) 0) ; present CS with background (steps 100 '(1 0) 0) ; inter-trial-interval (print (list trial V)) ))