Using Custom Environments – pt.3
https://pythonprogramming.net/custom-environment-reinforcement-learning-stable-baselines-3-tutorial/
(OpenAI의 gym Environment 대신에) 자신의 custom Environment에서 RL을 수행.
어떤 Enviroment를 gym Environment에 맞게 변환해 주면 된다. (말이 쉽지 구현이 까다롭다….)
RL에서 가장 까다로운 2가지
– Environment의 observation을 가져오는 일 (data engineering)
– Agent에 적절한 rewards부여하기
Game = 그 자체가 가장 좋은 Enviroments = 뱀게임 (source)
# https://theailearner.com/2019/03/10/creating-a-snake-game-using-opencv-python/
# https://github.com/TheAILearner/Snake-Game-using-OpenCV-Python/blob/master/snake_game_using_opencv.ipynb
import numpy as np
import cv2 # imshow(), waitKey(), rectangle(), putText()
import random
import time
### Game Rules:
# 1. die
def collision_with_boundaries(snake_head):
if snake_head[0]>=500 or snake_head[0]<0 or snake_head[1]>=500 or snake_head[1]<0 :
return 1 # true
else:
return 0
# 2. die
def collision_with_self(snake_position):
snake_head = snake_position[0]
if snake_head in snake_position[1:]:
return 1
else:
return 0
# 3-1. Score increases and apple is moved to new position
def collision_with_apple(apple_position, score):
score += 1
apple_position = [random.randrange(1,50)*10,
random.randrange(1,50)*10]
return apple_position, score
### Game Window :: Display game objects
img = np.zeros((500,500, 3),dtype='uint8')
### Apple and Snake - Initial positions :: Display game objects
apple_position = [random.randrange(1,50)*10, random.randrange(1,50)*10]
snake_position = [[250,250],[240,250],[230,250]]
score = 0
prev_button_direction = 1
button_direction = 1
snake_head = [250,250]
while True:
cv2.imshow('a',img)
cv2.waitKey(1)
img = np.zeros((500,500,3), dtype='uint8')
### Apple and Snake :: Display game objects
cv2.rectangle(img,(apple_position[0], apple_position[1]), (apple_position[0]+10,apple_position[1]+10), (0,0,255), 3)
for position in snake_position:
cv2.rectangle(img,(position[0],position[1]), (position[0]+10,position[1]+10),(0,255,0),3)
# Takes step after fixed time
t_end = time.time() + 0.06
k = -1
while time.time() < t_end:
if k == -1:
k = cv2.waitKey(6)
else:
continue
# 0-Left, 1-Right, 3-Up, 2-Down, q-Break
# a-Left, d-Right, w-Up, s-Down
if k == ord('a') and prev_button_direction != 1:
button_direction = 0
elif k == ord('d') and prev_button_direction != 0:
button_direction = 1
elif k == ord('w') and prev_button_direction != 2:
button_direction = 3
elif k == ord('s') and prev_button_direction != 3:
button_direction = 2
elif k == ord('q'):
break
else:
button_direction = button_direction
prev_button_direction = button_direction
# Change the head position based on the button direction
if button_direction == 1:
snake_head[0] += 10
elif button_direction == 0:
snake_head[0] -= 10
elif button_direction == 2:
snake_head[1] += 10
elif button_direction == 3:
snake_head[1] -= 10
# 3-2. Increase Snake length on eating apple
if snake_head == apple_position:
apple_position, score = collision_with_apple(apple_position, score)
snake_position.insert(0,list(snake_head))
else:
snake_position.insert(0,list(snake_head))
snake_position.pop()
### Displaying the final Score
### On collision kill the snake and print the score
if collision_with_boundaries(snake_head) == 1 or collision_with_self(snake_position) == 1:
font = cv2.FONT_HERSHEY_SIMPLEX
img = np.zeros((500,500,3), dtype='uint8')
cv2.putText(img,'Your Score is {}'.format(score),(140,250), font, 1,(255,255,255),2,cv2.LINE_AA)
cv2.imshow('a',img)
cv2.waitKey(0)
#cv2.imwrite('D:/downloads/ii.jpg',img)
break
cv2.destroyAllWindows()
위 게임을 gym environment로 변환하기 위해 필요한 구조
import gymnasium from gymnasium import spaces class CustomEnv(gymnasium.Env): \tdef __init__(self, arg1, arg2, ...): \t\tsuper(CustomEnv, self).__init__() \t\t... \tdef step(self, action): \t\t... \t \tdef reset(self): \t\t... \tdef render(self, mode='human'): \t\t... \tdef close (self): \t\t... env.close()
"""Custom Environment that follows gym interface"""
import gymnasium from gymnasium import spaces env = gymnasium.Env # env.reset() class CustomEnv(env): \tdef __init__(self, arg1, arg2): \t\tsuper(CustomEnv, self).__init__() \t\t# Define Action and Observation space :: gym.spaces objects 중 하나 \t\t### Action : Example when using discrete actions \t\t# self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) \t\t# 0-Left, 1-Right, 3-Up, 2-Down \t\tself.action_space = spaces.Discrete(4) \t\t### Obs : Example for using image as input (channel-first; channel-last also works): \t\t# self.observation_space = spaces.Box( \t\t# \tlow=0, high=255, \t\t# \tshape=(N_CHANNELS, HEIGHT, WIDTH), dtype=np.uint8 \t\t# ) \t\tobservation = [ \t\t\thead_x, head_y, \t\t\tapple_delta_x, apple_delta_y, \t\t\tsnake_length \t\t] + list(self.prev_actions) \t\t### Reward : # self.total_reward = len(self.snake_position) - 3 \tdef step(self, action): \t\t... \t\treturn observation, reward, done, info \t \tdef reset(self): \t\t... \t\treturn observation # reward, done, info can't be included # -------------------------------------------------------------------------- \tdef render(self, mode='human'): \t\t... \tdef close (self): \t\t... # ------------------------------------------------------------------------------ env.close()
defining our action space and observation space.
action space
4 clear possible actions
self.action_space = spaces.Discrete(4)
observation space
게임이미지 보다는.. 어설프더라도 좀더 구체적인 수치.
뱀머리 위치, 사과 위치, 뱀몸통 위치,
observation = [head_x, head_y, apple_delta_x, apple_delta_y, snake_length] + list(self.prev_actions)
reward
뱀길이
self.total_reward = len(self.snake_position) - 3 # start length is 3
reset()
episode시작시 마다 (steps이 시작하기전에) 호출