// 3legs4 pomdp spec // author: Pascal Poupart // for descriptions of this model please see the relevant publications // // VDCBPI: an Approximate Scalable Algorithm for Large Scale POMDPs // Pascal Poupart and Craig Boutilier // In Advances in Neural Information Processing Systems 17 (NIPS), pages 1081-1088, Vancouver, BC, 2004 // Exploiting Structure to Efficiently Solve Large Scale Partially Observable Markov Decision Processes // Pascal Poupart // Ph.D. thesis, Department of Computer Science, University of Toronto, Toronto, 2005 // // This code is to be used for research purposes only, is distributed "as is" with no guarantees. Please inform the authors of any uses of this problem specification. (variables (m1 down up) (m2 down up) (m3 down up) (m4 down up) ) (observations (status oDown oUp) ) init (m4 (down (0.0)) (up (m3 (down (0.0)) (up (m2 (down (0.0)) (up (m1 (down (0.0)) (up (1.0))))))))) unnormalised action reboot_m1 m1 (m1' (down (0.05)) (up (0.95))) m2 (m2' (down (m2 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m2 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m3 (m3' (down (m3 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m3 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m4 (m4' (down (m4 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m4 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) observe status (status' (oDown (1.0)) (oUp (0.0))) endobserve cost [+ (m1 (down (-0.0)) (up (-2.0))) (m2 (down (-0.0)) (up (-1.0))) (m3 (down (-0.0)) (up (-1.0))) (m4 (down (-0.0)) (up (-1.0))) (2.5)] endaction action reboot_m2 m1 (m1' (down (m1 (down (0.99)) (up (0.1)))) (up (m1 (down (0.01)) (up (0.9))))) m2 (m2' (down (0.05)) (up (0.95))) m3 (m3' (down (m3 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m3 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m4 (m4' (down (m4 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m4 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) observe status (status' (oDown (1.0)) (oUp (0.0))) endobserve cost [+ (m1 (down (-0.0)) (up (-2.0))) (m2 (down (-0.0)) (up (-1.0))) (m3 (down (-0.0)) (up (-1.0))) (m4 (down (-0.0)) (up (-1.0))) (2.5)] endaction action reboot_m3 m1 (m1' (down (m1 (down (0.99)) (up (0.1)))) (up (m1 (down (0.01)) (up (0.9))))) m2 (m2' (down (m2 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m2 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m3 (m3' (down (0.05)) (up (0.95))) m4 (m4' (down (m4 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m4 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) observe status (status' (oDown (1.0)) (oUp (0.0))) endobserve cost [+ (m1 (down (-0.0)) (up (-2.0))) (m2 (down (-0.0)) (up (-1.0))) (m3 (down (-0.0)) (up (-1.0))) (m4 (down (-0.0)) (up (-1.0))) (2.5)] endaction action reboot_m4 m1 (m1' (down (m1 (down (0.99)) (up (0.1)))) (up (m1 (down (0.01)) (up (0.9))))) m2 (m2' (down (m2 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m2 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m3 (m3' (down (m3 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m3 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m4 (m4' (down (0.05)) (up (0.95))) observe status (status' (oDown (1.0)) (oUp (0.0))) endobserve cost [+ (m1 (down (-0.0)) (up (-2.0))) (m2 (down (-0.0)) (up (-1.0))) (m3 (down (-0.0)) (up (-1.0))) (m4 (down (-0.0)) (up (-1.0))) (2.5)] endaction action ping_m1 m1 (m1' (down (m1 (down (0.99)) (up (0.1)))) (up (m1 (down (0.01)) (up (0.9))))) m2 (m2' (down (m2 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m2 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m3 (m3' (down (m3 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m3 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m4 (m4' (down (m4 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m4 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) observe status (status' (oDown (m1' (down (0.95)) (up (0.05)))) (oUp (m1' (down (0.05)) (up (0.95))))) endobserve cost [+ (m1 (down (-0.0)) (up (-2.0))) (m2 (down (-0.0)) (up (-1.0))) (m3 (down (-0.0)) (up (-1.0))) (m4 (down (-0.0)) (up (-1.0))) (0.1)] endaction action ping_m2 m1 (m1' (down (m1 (down (0.99)) (up (0.1)))) (up (m1 (down (0.01)) (up (0.9))))) m2 (m2' (down (m2 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m2 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m3 (m3' (down (m3 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m3 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m4 (m4' (down (m4 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m4 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) observe status (status' (oDown (m2' (down (0.95)) (up (0.05)))) (oUp (m2' (down (0.05)) (up (0.95))))) endobserve cost [+ (m1 (down (-0.0)) (up (-2.0))) (m2 (down (-0.0)) (up (-1.0))) (m3 (down (-0.0)) (up (-1.0))) (m4 (down (-0.0)) (up (-1.0))) (0.1)] endaction action ping_m3 m1 (m1' (down (m1 (down (0.99)) (up (0.1)))) (up (m1 (down (0.01)) (up (0.9))))) m2 (m2' (down (m2 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m2 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m3 (m3' (down (m3 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m3 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m4 (m4' (down (m4 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m4 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) observe status (status' (oDown (m3' (down (0.95)) (up (0.05)))) (oUp (m3' (down (0.05)) (up (0.95))))) endobserve cost [+ (m1 (down (-0.0)) (up (-2.0))) (m2 (down (-0.0)) (up (-1.0))) (m3 (down (-0.0)) (up (-1.0))) (m4 (down (-0.0)) (up (-1.0))) (0.1)] endaction action ping_m4 m1 (m1' (down (m1 (down (0.99)) (up (0.1)))) (up (m1 (down (0.01)) (up (0.9))))) m2 (m2' (down (m2 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m2 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m3 (m3' (down (m3 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m3 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m4 (m4' (down (m4 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m4 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) observe status (status' (oDown (m4' (down (0.95)) (up (0.05)))) (oUp (m4' (down (0.05)) (up (0.95))))) endobserve cost [+ (m1 (down (-0.0)) (up (-2.0))) (m2 (down (-0.0)) (up (-1.0))) (m3 (down (-0.0)) (up (-1.0))) (m4 (down (-0.0)) (up (-1.0))) (0.1)] endaction action doNothing m1 (m1' (down (m1 (down (0.99)) (up (0.1)))) (up (m1 (down (0.01)) (up (0.9))))) m2 (m2' (down (m2 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m2 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m3 (m3' (down (m3 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m3 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) m4 (m4' (down (m4 (down (0.99)) (up (m1 (down (0.333)) (up (0.1)))))) (up (m4 (down (0.01)) (up (m1 (down (0.667)) (up (0.9))))))) observe status (status' (oDown (1.0)) (oUp (0.0))) endobserve cost [+ (m1 (down (-0.0)) (up (-2.0))) (m2 (down (-0.0)) (up (-1.0))) (m3 (down (-0.0)) (up (-1.0))) (m4 (down (-0.0)) (up (-1.0)))] endaction discount 0.950000 tolerance 0.001500