Commit b2366d8a authored by Nicola Botta's avatar Nicola Botta
Browse files

Adding Bellman.

parent 4cf1a317
> module SequentialDecisionProblems.CoreTheory
> import Sigma.Sigma
> -- infixr 7 <+>
> infixr 7 <++>
> public export
> record SDP where
> constructor MkSDP
> M : Type -> Type
> fM : Functor M
> X : (t : Nat) -> Type
> Y : (t : Nat) -> (x : X t) -> Type
> next : (t : Nat) -> (x : X t) -> (y : Y t x) -> M (X (S t))
> Val : Type
> reward : (t : Nat) -> (x : X t) -> (y : Y t x) -> (x' : X (S t)) -> Val
> (<+>) : Val -> Val -> Val
> zero : Val
> (<=) : Val -> Val -> Type
> meas : M Val -> Val
> M : Type -> Type
> fM : Functor M
> X : (t : Nat) -> Type
> Y : (t : Nat) -> X t -> Type
> next : (t : Nat) -> (x : X t) -> Y t x -> M (X (S t))
> Val : Type
> reward : (t : Nat) -> (x : X t) -> Y t x -> X (S t) -> Val
> (<+>) : Val -> Val -> Val
> zero : Val
> (<=) : Val -> Val -> Type
> lteRefl : {v : Val} -> v <= v
> lteTrans : {v1, v2, v3 : Val} -> v1 <= v2 -> v2 <= v3 -> v1 <= v3
> plusMon : {v1, v2, v3, v4 : Val} -> v1 <= v2 -> v3 <= v4 -> (v1 <+> v3) <= (v2 <+> v4)
> meas : M Val -> Val
> -- measMon : {A : Type} -> (f, g : A -> Val) -> ((a : A) -> f a <= g a) ->
> -- (ma : M A) -> meas (map f ma) <= meas (map g ma)
> (<++>) : {sdp : SDP} -> {A : Type} -> (f, g : A -> Val sdp) -> A -> Val sdp
> (<++>) {sdp} f g = let (<+>) = (<+>) sdp in
> \ a => f a <+> g a
> Policy : SDP -> (t : Nat) -> Type
> Policy sdp t = (x : X sdp t) -> Y sdp t x
> data PolicySeq : SDP -> (t : Nat) -> (n : Nat) -> Type where
> Nil : {sdp : SDP} -> {t : Nat} ->
> PolicySeq sdp t Z
> (::) : {sdp : SDP} -> {t, n : Nat} ->
> Policy sdp t -> PolicySeq sdp (S t) n -> PolicySeq sdp t (S n)
> val : {sdp : SDP} -> {t, n : Nat} -> PolicySeq sdp t n -> X sdp t -> Val sdp
> val {sdp} {t} Nil x = zero sdp
> val {sdp} {t} (p :: ps) x = let y = p x in
> let mx' = next sdp t x y in
> let fM' = fM sdp in
> meas (map (\ x' => ((<+>) sdp) (reward sdp t x y x') (val ps x')) mx')
> val : {sdp : SDP} -> {t, n : Nat} -> Functor (M sdp) => PolicySeq sdp t n -> X sdp t -> Val sdp
> val {sdp} {t} Nil x = zero sdp
> val {sdp} {t} (p :: ps) x = let map = map {f = M sdp} in
> let next = next sdp in
> let reward = reward sdp in
> let meas = meas sdp in
> let y = p x in
> let mx' = next t x y in
> meas (map (reward t x y <++> val ps) mx')
> OptPolicySeq : {sdp : SDP} -> {t, n : Nat} -> Functor (M sdp) =>
> PolicySeq sdp t n -> Type
> OptPolicySeq {sdp} {t} {n} ps = let (<=) = (<=) sdp in
> (ps' : PolicySeq sdp t n) -> (x : X sdp t) ->
> val ps' x <= val ps x
> OptExt : {sdp : SDP} -> {t, n : Nat} -> Functor (M sdp) =>
> PolicySeq sdp (S t) n -> Policy sdp t -> Type
> OptExt {sdp} {t} ps p = let (<=) = (<=) sdp in
> (p' : Policy sdp t) -> (x : X sdp t) ->
> val (p' :: ps) x <= val (p :: ps) x
> Bellman : {sdp : SDP} -> {t, n : Nat} -> Functor (M sdp) =>
> (ps : PolicySeq sdp (S t) n) -> OptPolicySeq ps ->
> (p : Policy sdp t) -> OptExt ps p ->
> OptPolicySeq (p :: ps)
> Bellman {sdp} {t} ps ops p oep (p' :: ps') x =
> -- let X = X sdp in
> let next = next sdp in
> let reward = reward sdp in
> let (<+>) = (<+>) sdp in
> let (<=) = (<=) sdp in
> -- let measMon = measMon sdp in
> let plusMon = plusMon sdp in
> let lteRefl = lteRefl sdp in
> let lteTrans = lteTrans sdp in
> let y' = p' x in
> let mx' = next t x y' in
> let f' = \ x' => reward t x y' x' <+> val ps' x' in
> let f = \ x' => reward t x y' x' <+> val ps x' in
> let s0 : ((x' : X sdp (S t)) -> (f' x') <= (f x'))
> = ?kika in -- \ x' => plusMon {v1 = reward t x y' x'}
> -- {v2 = reward t x y' x'}
> -- {v3 = val ps' x'}
> -- {v4 = val ps x'} (lteRefl {v = reward t x y' x'}) (ops ps' x') in
> let s1 : (val (p' :: ps') x <= val (p' :: ps) x)
> = ?lala in -- measMon f' f s0 mx' in
> let s2 : (val (p' :: ps) x <= val (p :: ps) x)
> = oep p' x in
> lteTrans s1 s2
> {-
......
> module FirstClassConstraints
> record Foo where
> constructor MkFoo
> X : Type
> (<+>) : X -> X -> X
> -- bar : {foo : Foo} -> X foo -> X foo
> -- bar {foo} x = (<+>) foo x x
> bar : {foo : Foo} -> X foo -> X foo
> bar {foo} x = let (<+>) = (<+>) foo in x <+> x
> {-
> ---}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment