Using Custom Environments – pt.3
https://pythonprogramming.net/custom-environment-reinforcement-learning-stable-baselines-3-tutorial/
(OpenAI의 gym Environment 대신에) 자신의 custom Environment에서 RL을 수행.
어떤 Enviroment를 gym Environment에 맞게 변환해 주면 된다. (말이 쉽지 구현이 까다롭다….)
RL에서 가장 까다로운 2가지
– Environment의 observation을 가져오는 일 (data engineering)
– Agent에 적절한 rewards부여하기
Game = 그 자체가 가장 좋은 Enviroments = 뱀게임 (source)
# https://theailearner.com/2019/03/10/creating-a-snake-game-using-opencv-python/ # https://github.com/TheAILearner/Snake-Game-using-OpenCV-Python/blob/master/snake_game_using_opencv.ipynb import numpy as np import cv2 # imshow(), waitKey(), rectangle(), putText() import random import time ### Game Rules: # 1. die def collision_with_boundaries(snake_head): if snake_head[0]>=500 or snake_head[0]<0 or snake_head[1]>=500 or snake_head[1]<0 : return 1 # true else: return 0 # 2. die def collision_with_self(snake_position): snake_head = snake_position[0] if snake_head in snake_position[1:]: return 1 else: return 0 # 3-1. Score increases and apple is moved to new position def collision_with_apple(apple_position, score): score += 1 apple_position = [random.randrange(1,50)*10, random.randrange(1,50)*10] return apple_position, score ### Game Window :: Display game objects img = np.zeros((500,500, 3),dtype='uint8') ### Apple and Snake - Initial positions :: Display game objects apple_position = [random.randrange(1,50)*10, random.randrange(1,50)*10] snake_position = [[250,250],[240,250],[230,250]] score = 0 prev_button_direction = 1 button_direction = 1 snake_head = [250,250] while True: cv2.imshow('a',img) cv2.waitKey(1) img = np.zeros((500,500,3), dtype='uint8') ### Apple and Snake :: Display game objects cv2.rectangle(img,(apple_position[0], apple_position[1]), (apple_position[0]+10,apple_position[1]+10), (0,0,255), 3) for position in snake_position: cv2.rectangle(img,(position[0],position[1]), (position[0]+10,position[1]+10),(0,255,0),3) # Takes step after fixed time t_end = time.time() + 0.06 k = -1 while time.time() < t_end: if k == -1: k = cv2.waitKey(6) else: continue # 0-Left, 1-Right, 3-Up, 2-Down, q-Break # a-Left, d-Right, w-Up, s-Down if k == ord('a') and prev_button_direction != 1: button_direction = 0 elif k == ord('d') and prev_button_direction != 0: button_direction = 1 elif k == ord('w') and prev_button_direction != 2: button_direction = 3 elif k == ord('s') and prev_button_direction != 3: button_direction = 2 elif k == ord('q'): break else: button_direction = button_direction prev_button_direction = button_direction # Change the head position based on the button direction if button_direction == 1: snake_head[0] += 10 elif button_direction == 0: snake_head[0] -= 10 elif button_direction == 2: snake_head[1] += 10 elif button_direction == 3: snake_head[1] -= 10 # 3-2. Increase Snake length on eating apple if snake_head == apple_position: apple_position, score = collision_with_apple(apple_position, score) snake_position.insert(0,list(snake_head)) else: snake_position.insert(0,list(snake_head)) snake_position.pop() ### Displaying the final Score ### On collision kill the snake and print the score if collision_with_boundaries(snake_head) == 1 or collision_with_self(snake_position) == 1: font = cv2.FONT_HERSHEY_SIMPLEX img = np.zeros((500,500,3), dtype='uint8') cv2.putText(img,'Your Score is {}'.format(score),(140,250), font, 1,(255,255,255),2,cv2.LINE_AA) cv2.imshow('a',img) cv2.waitKey(0) #cv2.imwrite('D:/downloads/ii.jpg',img) break cv2.destroyAllWindows()
위 게임을 gym environment로 변환하기 위해 필요한 구조
import gymnasium from gymnasium import spaces class CustomEnv(gymnasium.Env): \tdef __init__(self, arg1, arg2, ...): \t\tsuper(CustomEnv, self).__init__() \t\t... \tdef step(self, action): \t\t... \t \tdef reset(self): \t\t... \tdef render(self, mode='human'): \t\t... \tdef close (self): \t\t... env.close()
"""Custom Environment that follows gym interface"""
import gymnasium from gymnasium import spaces env = gymnasium.Env # env.reset() class CustomEnv(env): \tdef __init__(self, arg1, arg2): \t\tsuper(CustomEnv, self).__init__() \t\t# Define Action and Observation space :: gym.spaces objects 중 하나 \t\t### Action : Example when using discrete actions \t\t# self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) \t\t# 0-Left, 1-Right, 3-Up, 2-Down \t\tself.action_space = spaces.Discrete(4) \t\t### Obs : Example for using image as input (channel-first; channel-last also works): \t\t# self.observation_space = spaces.Box( \t\t# \tlow=0, high=255, \t\t# \tshape=(N_CHANNELS, HEIGHT, WIDTH), dtype=np.uint8 \t\t# ) \t\tobservation = [ \t\t\thead_x, head_y, \t\t\tapple_delta_x, apple_delta_y, \t\t\tsnake_length \t\t] + list(self.prev_actions) \t\t### Reward : # self.total_reward = len(self.snake_position) - 3 \tdef step(self, action): \t\t... \t\treturn observation, reward, done, info \t \tdef reset(self): \t\t... \t\treturn observation # reward, done, info can't be included # -------------------------------------------------------------------------- \tdef render(self, mode='human'): \t\t... \tdef close (self): \t\t... # ------------------------------------------------------------------------------ env.close()
defining our action space and observation space.
action space
4 clear possible actions
self.action_space = spaces.Discrete(4)
observation space
게임이미지 보다는.. 어설프더라도 좀더 구체적인 수치.
뱀머리 위치, 사과 위치, 뱀몸통 위치,
observation = [head_x, head_y, apple_delta_x, apple_delta_y, snake_length] + list(self.prev_actions)
reward
뱀길이
self.total_reward = len(self.snake_position) - 3 # start length is 3
reset()
episode시작시 마다 (steps이 시작하기전에) 호출