深度强化学习
第三篇:简介OpenAI的Gym
提纲
- 什么是Gym
- 常用的相关函数
内容
🙋♂ 什么是Gym?
训练参数的基本平台openai的Gym,与tensorflow无缝连接,仅支持python,本质是一组微分方程,简单的模型手动推导,复杂的模型需要用一些强大的物理引擎,如ODE, Bullet, Havok, Physx等,Gym在搭建机器人仿真环境用的是mujoco,ROS里面的物理引擎是gazebo。
🙋♂ 常用的相关函数
- reset() 初始化函数
xxxxxxxxxx101def reset(self):2""" 重新初始化函数 """3# 利用均匀随机分布初试化环境的状态4self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))56# 设置当前步数为None7self.steps_beyond_done = None89# 返回环境的初始化状态10return np.array(self.state)
- render() 图像引擎
xxxxxxxxxx1641def render(self, mode='human'):2""" render()函数在这里扮演图像引擎的角色。一个仿真环境必不可少的两部分是物理引擎和图像引擎。物理引擎模拟环境中物体的运动规律;图像引擎用来显示环境中的物体图像 """3screen_width = 6004screen_height = 40056world_width = self.x_threshold*27scale = screen_width/world_width8carty = 100 # TOP OF CART9polewidth = 10.010polelen = scale * 1.011cartwidth = 50.012cartheight = 30.01314if self.viewer is None:15# 导入rendering模块,利用rendering模块中的画图函数进行图形的绘制16from gym.envs.classic_control import rendering1718# 绘制600*400的窗口函数为screen_width*screen_height19self.viewer = rendering.Viewer(screen_width, screen_height)2021# 创建cart矩形,rendering.FilledPolygon为填充一个矩形22l,r,t,b = -cartwidth/2, cartwidth/2, cartheight/2, -cartheight/223axleoffset =cartheight/4.024cart = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])25# Transform给cart添加平移属性和旋转属性26self.carttrans = rendering.Transform()27cart.add_attr(self.carttrans)28# 在图上加入几何cart29self.viewer.add_geom(cart)3031# 创建摆杆pole32l,r,t,b = -polewidth/2,polewidth/2,polelen-polewidth/2,-polewidth/233pole = rendering.FilledPolygon([(l,b), (l,t), (r,t), (r,b)])34# 给pole设置颜色35pole.set_color(.8,.6,.4)36# 添加摆杆转换矩阵属性37self.poletrans = rendering.Transform(translation=(0, axleoffset))38pole.add_attr(self.poletrans)39pole.add_attr(self.carttrans)40# 加入几何体41self.viewer.add_geom(pole)4243# 创建摆杆和台车之间的连接44self.axle = rendering.make_circle(polewidth/2)45self.axle.add_attr(self.poletrans)46self.axle.add_attr(self.carttrans)47self.axle.set_color(.5,.5,.8)48self.viewer.add_geom(self.axle)4950#创建台车来回滑动的轨道,即一条直线51self.track = rendering.Line((0,carty), (screen_width,carty))52self.track.set_color(0,0,0)53self.viewer.add_geom(self.track)5455if self.state is None: return None5657# 设置平移属性58x = self.state59cartx = x[0]*scale+screen_width/2.0 # MIDDLE OF CART60self.carttrans.set_translation(cartx, carty)61self.poletrans.set_rotation(-x[2])6263return self.viewer.render(return_rgb_array = mode=='rgb_array')
- step() 物理引擎
xxxxxxxxxx1461def step(self, action):2""" 该函数在仿真器中扮演物理引擎的角色。其输入是动作a,输出是:下一步状态,立即回报,是否终止,调试项。该函数描述了智能体与环境交互的所有信息,是环境文件中最重要的函数。在该函数中,一般利用智能体的运动学模型和动力学模型计算下一步的状态和立即回报,并判断是否达到终止状态 """3assert self.action_space.contains(action), "%r (%s) invalid"%(action, type(action))4state = self.state5# 系统的当前状态6x, x_dot, theta, theta_dot = state7# 输入动作,即作用到车上的力8force = self.force_mag if action==1 else -self.force_mag9# 余弦函数10costheta = math.cos(theta)11# 正弦函数12sintheta = math.sin(theta)13# 车摆的动力学方程式,即加速度与动作之间的关系14temp = (force + self.polemass_length * theta_dot * theta_dot * sintheta) / self.total_mass15# 摆的角加速度16thetaacc = (self.gravity * sintheta - costheta* temp) / (self.length * (4.0/3.0 - self.masspole * costheta * costheta / self.total_mass))17# 小车的平加速度18xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass19# tau是更新步长0.02,下面是计算下一步的状态20x = x + self.tau * x_dot21x_dot = x_dot + self.tau * xacc22theta = theta + self.tau * theta_dot23theta_dot = theta_dot + self.tau * thetaacc24self.state = (x,x_dot,theta,theta_dot)25# 设定小车和摆杆的阈值26done = x < -self.x_threshold \27or x > self.x_threshold \28or theta < -self.theta_threshold_radians \29or theta > self.theta_threshold_radians30done = bool(done)3132#33if not done:34reward = 1.035elif self.steps_beyond_done is None:36# Pole just fell!37self.steps_beyond_done = 038reward = 1.039else:40if self.steps_beyond_done == 0:41logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")42self.steps_beyond_done += 143reward = 0.04445return np.array(self.state), reward, done, {}
- 一个简单的Demo
xxxxxxxxxx1141import gym2import time3env = gym.make('CartPole-v0') #创造环境4observation = env.reset() #初始化环境,observation为环境状态5count = 06for t in range(100):7action = env.action_space.sample() #随机采样动作8observation, reward, done, info = env.step(action) #与环境交互,获得下一步的时刻9if done:10break11env.render() #绘制场景12count+=113time.sleep(0.2) #每次等待0.2s14print(count) #打印该次尝试的步数