(defvar n 0) (defvar k 0) (defvar successor) (defvar R) (defvar Q) (defvar gamma .9) (defvar alpha .1) (defvar epsilon .1) (defvar randomness) (defvar max-num-tasks 2000) (defvar policy) (defvar V) (defun setup (n-arg k-arg) (setq n n-arg) (setq k k-arg) (setq successor (make-array (list n 2 k))) (setq R (make-array (list n (+ k 1) 2))) (setq Q (make-array (list n 2))) (setq policy (make-array n)) (setq V (make-array n)) (setq randomness (make-array max-num-tasks)) (standardize-random-state) (advance-random-state 0) (loop for task below max-num-tasks do (loop repeat 17 do (random 2)) (setf (aref randomness task) (make-random-state)))) (defun init (task-num) (setq *random-state* (make-random-state (aref randomness task-num))) (loop for s below n do (loop for a below 2 do (setf (aref Q s a) 0.0) (setf (aref R s k a) (random-normal)) (loop for sp in (random-k-of-n k n) for i below k do (setf (aref successor s a i) sp) do (setf (aref R s i a) (random-normal)))))) (defun random-k-of-n (k n) (loop for i = (random n) unless (member i result) collect i into result until (= k (length result)) finally (return result))) (defun next-state (s a) (with-prob gamma (aref successor s a (random k)) n)) (defun full-backup (s a) (+ (* (- 1 gamma) (aref R s k a)) (* gamma (/ k) (loop for i below k for sp = (aref successor s a i) sum (aref R s i a) sum (* gamma (loop for ap below 2 maximize (aref Q sp ap))))))) (defun runs-sweeps (n-arg k-arg num-runs num-sweeps sweeps-per-measurement) (unless (and (= n n-arg) (= k k-arg)) (setup n-arg k-arg)) (loop with backups-per-measurement = (truncate (* sweeps-per-measurement 2 n)) with backups-per-sweep = (* n 2) with num-backups = (* num-sweeps backups-per-sweep) with num-measurements = (truncate num-backups backups-per-measurement) with perf = (make-array num-measurements :initial-element 0.0) for run below num-runs do (init run) (format t "~A " run) (loop with backups = 0 repeat num-sweeps do (loop for s below n do (loop for a below 2 do (when (= 0 (mod backups backups-per-measurement)) (incf (aref perf (/ backups backups-per-measurement)) (measure-performance))) (setf (aref Q s a) (full-backup s a)) (incf backups)))) finally (record n k num-runs num-sweeps sweeps-per-measurement gamma 1 nil (loop for i below num-measurements collect (/ (aref perf i) num-runs))))) (defun runs-trajectories (n-arg k-arg num-runs num-sweeps sweeps-per-measurement) (unless (and (= n n-arg) (= k k-arg)) (setup n-arg k-arg)) (loop with backups-per-measurement = (truncate (* sweeps-per-measurement 2 n)) with backups-per-sweep = (* n 2) with num-backups = (* num-sweeps backups-per-sweep) with num-measurements = (truncate num-backups backups-per-measurement) with perf = (make-array num-measurements :initial-element 0.0) for run below num-runs do (init run) (format t "~A " run) (loop named run with backups = 0 do (loop for state = 0 then next-state for action = (with-prob epsilon (random 2) (if (>= (aref Q state 0) (aref Q state 1)) 0 1)) for next-state = (next-state state action) do (when (= 0 (mod backups backups-per-measurement)) (incf (aref perf (/ backups backups-per-measurement)) (measure-performance))) (setf (aref Q state action) (full-backup state action)) (incf backups) (when (= backups num-backups) (return-from run)) until (= next-state n))) finally (record n k num-runs num-sweeps sweeps-per-measurement gamma 1 epsilon (loop for i below num-measurements collect (/ (aref perf i) num-runs))))) (defun measure-performance () (loop for s below n do (setf (aref V s) 0.0) (setf (aref policy s) (if (>= (aref Q s 0) (aref Q s 1)) 0 1))) (loop for delta = (loop for s below n for old-V = (aref V s) do (setf (aref V s) (full-backup s (aref policy s))) sum (abs (- old-V (aref V s)))) until (< delta .001)) (aref V 0)) (defun both (n-arg k-arg runs-arg sweeps-arg measure-arg) (runs-sweeps n-arg k-arg runs-arg sweeps-arg measure-arg) (runs-trajectories n-arg k-arg runs-arg sweeps-arg measure-arg) (graph-data :n n-arg :k k-arg :runs runs-arg :sweeps sweeps-arg :sweeps-per-measurement measure-arg)) (defun big-exp () (both 10 1 200 10 1) (both 10 3 200 10 1) (both 100 1 200 10 .5) (both 100 3 200 10 .5) (both 100 10 200 10 .5) (both 1000 1 200 10 .2) (both 1000 3 200 10 .2) (both 1000 10 200 10 .2) (both 1000 20 200 10 .2) (both 10000 1 100 10 .1) (both 10000 3 200 10 .1) (both 10000 10 200 10 .1) (both 10000 20 200 10 .1) (both 10000 50 200 10 .1))