（1）随机变量

（2）概率分布

（3）条件概率

（4）期望和方差

（5）方差

（1）贪婪策略

（2）-greedy 策略

（3）高斯策略

（4）玻尔兹曼分布

## ​​grid_mdp.py​​ 文件代码

import loggingimport numpyimport randomfrom gym import spacesimport gymlogger = logging.getLogger(__name__)class GridEnv(gym.Env):    metadata = {        'render.modes': ['human', 'rgb_array'],        'video.frames_per_second': 2    }    def __init__(self):        self.states = [1,2,3,4,5,6,7,8] #状态空间        self.x=[140,220,300,380,460,140,300,460]        self.y=[250,250,250,250,250,150,150,150]        self.terminate_states = dict()  #终止状态为字典格式        self.terminate_states[6] = 1        self.terminate_states[7] = 1        self.terminate_states[8] = 1        self.actions = ['n','e','s','w']        self.rewards = dict();        #回报的数据结构为字典        self.rewards['1_s'] = -1.0        self.rewards['3_s'] = 1.0        self.rewards['5_s'] = -1.0        self.t = dict();             #状态转移的数据格式为字典        self.t['1_s'] = 6        self.t['1_e'] = 2        self.t['2_w'] = 1        self.t['2_e'] = 3        self.t['3_s'] = 7        self.t['3_w'] = 2        self.t['3_e'] = 4        self.t['4_w'] = 3        self.t['4_e'] = 5        self.t['5_s'] = 8        self.t['5_w'] = 4        self.gamma = 0.8         #折扣因子        self.viewer = None        self.state = None    def getTerminal(self):        return self.terminate_states    def getGamma(self):        return self.gamma    def getStates(self):        return self.states    def getAction(self):        return self.actions    def getTerminate_states(self):        return self.terminate_states    def setAction(self,s):        self.state=s    def _step(self, action):        #系统当前状态        state = self.state        if state in self.terminate_states:            return state, 0, True, {}        key = "%d_%s"%(state, action)   #将状态和动作组成字典的键值        #状态转移        if key in self.t:            next_state = self.t[key]        else:            next_state = state        self.state = next_state        is_terminal = False        if next_state in self.terminate_states:            is_terminal = True        if key not in self.rewards:            r = 0.0        else:            r = self.rewards[key]        return next_state, r,is_terminal,{}    def _reset(self):        self.state = self.states[int(random.random() * len(self.states))]        return self.state    def render(self, mode='human', close=False):        if close:            if self.viewer is not None:                self.viewer.close()                self.viewer = None            return        screen_width = 600        screen_height = 400        if self.viewer is None:            from gym.envs.classic_control import rendering            self.viewer = rendering.Viewer(screen_width, screen_height)            #创建网格世界            self.line1 = rendering.Line((100,300),(500,300))            self.line2 = rendering.Line((100, 200), (500, 200))            self.line3 = rendering.Line((100, 300), (100, 100))            self.line4 = rendering.Line((180, 300), (180, 100))            self.line5 = rendering.Line((260, 300), (260, 100))            self.line6 = rendering.Line((340, 300), (340, 100))            self.line7 = rendering.Line((420, 300), (420, 100))            self.line8 = rendering.Line((500, 300), (500, 100))            self.line9 = rendering.Line((100, 100), (180, 100))            self.line10 = rendering.Line((260, 100), (340, 100))            self.line11 = rendering.Line((420, 100), (500, 100))            #创建第一个骷髅            self.kulo1 = rendering.make_circle(40)            self.circletrans = rendering.Transform(translation=(140,150))            self.kulo1.add_attr(self.circletrans)            self.kulo1.set_color(0,0,0)            #创建第二个骷髅            self.kulo2 = rendering.make_circle(40)            self.circletrans = rendering.Transform(translation=(460, 150))            self.kulo2.add_attr(self.circletrans)            self.kulo2.set_color(0, 0, 0)            #创建金条            self.gold = rendering.make_circle(40)            self.circletrans = rendering.Transform(translation=(300, 150))            self.gold.add_attr(self.circletrans)            self.gold.set_color(1, 0.9, 0)            #创建机器人            self.robot= rendering.make_circle(30)            self.robotrans = rendering.Transform()            self.robot.add_attr(self.robotrans)            self.robot.set_color(0.8, 0.6, 0.4)            self.line1.set_color(0, 0, 0)            self.line2.set_color(0, 0, 0)            self.line3.set_color(0, 0, 0)            self.line4.set_color(0, 0, 0)            self.line5.set_color(0, 0, 0)            self.line6.set_color(0, 0, 0)            self.line7.set_color(0, 0, 0)            self.line8.set_color(0, 0, 0)            self.line9.set_color(0, 0, 0)            self.line10.set_color(0, 0, 0)            self.line11.set_color(0, 0, 0)            self.viewer.add_geom(self.line1)            self.viewer.add_geom(self.line2)            self.viewer.add_geom(self.line3)            self.viewer.add_geom(self.line4)            self.viewer.add_geom(self.line5)            self.viewer.add_geom(self.line6)            self.viewer.add_geom(self.line7)            self.viewer.add_geom(self.line8)            self.viewer.add_geom(self.line9)            self.viewer.add_geom(self.line10)            self.viewer.add_geom(self.line11)            self.viewer.add_geom(self.kulo1)            self.viewer.add_geom(self.kulo2)            self.viewer.add_geom(self.gold)            self.viewer.add_geom(self.robot)        if self.state is None: return None        #self.robotrans.set_translation(self.x[self.state-1],self.y[self.state-1])        self.robotrans.set_translation(self.x[self.state-1], self.y[self.state- 1])        return self.viewer.render(return_rgb_array=mode == 'rgb_array')