# Using Custom Environments – pt.3

https://pythonprogramming.net/custom-environment-reinforcement-learning-stable-baselines-3-tutorial/

(OpenAI의 gym Environment 대신에) 자신의 custom Environment에서 RL을 수행.

어떤 Enviroment를 gym Environment에 맞게 변환해 주면 된다. (말이 쉽지 구현이 까다롭다….)

RL에서 가장 까다로운 2가지
– Environment의 observation을 가져오는 일 (data engineering)
– Agent에 적절한 rewards부여하기

Game = 그 자체가 가장 좋은 Enviroments = 뱀게임 (source)

```# https://theailearner.com/2019/03/10/creating-a-snake-game-using-opencv-python/
# https://github.com/TheAILearner/Snake-Game-using-OpenCV-Python/blob/master/snake_game_using_opencv.ipynb

import numpy as np
import cv2  # imshow(), waitKey(), rectangle(), putText()
import random
import time

### Game Rules:
# 1. die
return 1  # true
else:
return 0
# 2. die
def collision_with_self(snake_position):
return 1
else:
return 0

# 3-1. Score increases and apple is moved to new position
def collision_with_apple(apple_position, score):
score += 1
apple_position = [random.randrange(1,50)*10,
random.randrange(1,50)*10]
return apple_position, score

### Game Window :: Display game objects
img = np.zeros((500,500, 3),dtype='uint8')

### Apple and Snake - Initial positions :: Display game objects
apple_position = [random.randrange(1,50)*10, random.randrange(1,50)*10]
snake_position = [[250,250],[240,250],[230,250]]

score = 0
prev_button_direction = 1
button_direction = 1

while True:
cv2.imshow('a',img)
cv2.waitKey(1)
img = np.zeros((500,500,3), dtype='uint8')

### Apple and Snake :: Display game objects
cv2.rectangle(img,(apple_position[0], apple_position[1]), (apple_position[0]+10,apple_position[1]+10), (0,0,255), 3)
for position in snake_position:
cv2.rectangle(img,(position[0],position[1]), (position[0]+10,position[1]+10),(0,255,0),3)

# Takes step after fixed time
t_end = time.time() + 0.06
k = -1
while time.time() < t_end:
if k == -1:
k = cv2.waitKey(6)
else:
continue

# 0-Left, 1-Right, 3-Up, 2-Down, q-Break
# a-Left, d-Right, w-Up, s-Down
if   k == ord('a') and prev_button_direction != 1:
button_direction = 0
elif k == ord('d') and prev_button_direction != 0:
button_direction = 1
elif k == ord('w') and prev_button_direction != 2:
button_direction = 3
elif k == ord('s') and prev_button_direction != 3:
button_direction = 2
elif k == ord('q'):
break
else:
button_direction = button_direction
prev_button_direction = button_direction

# Change the head position based on the button direction
if button_direction == 1:
elif button_direction == 0:
elif button_direction == 2:
elif button_direction == 3:

# 3-2. Increase Snake length on eating apple
apple_position, score = collision_with_apple(apple_position, score)
else:
snake_position.pop()

### Displaying the final Score
### On collision kill the snake and print the score
if collision_with_boundaries(snake_head) == 1 or collision_with_self(snake_position) == 1:
font = cv2.FONT_HERSHEY_SIMPLEX
img = np.zeros((500,500,3), dtype='uint8')
cv2.putText(img,'Your Score is {}'.format(score),(140,250), font, 1,(255,255,255),2,cv2.LINE_AA)
cv2.imshow('a',img)
cv2.waitKey(0)
break

cv2.destroyAllWindows()```

위 게임을 gym environment로 변환하기 위해 필요한 구조

```import gymnasium
from gymnasium import spaces

class CustomEnv(gymnasium.Env):
\tdef __init__(self, arg1, arg2, ...):
\t\tsuper(CustomEnv, self).__init__()
\t\t...
\tdef step(self, action):
\t\t...
\t
\tdef reset(self):
\t\t...

\tdef render(self, mode='human'):
\t\t...

\tdef close (self):
\t\t...

env.close()```

"""Custom Environment that follows gym interface"""

```import gymnasium
from gymnasium import spaces

env = gymnasium.Env
# env.reset()

class CustomEnv(env):
\tdef __init__(self, arg1, arg2):
\t\tsuper(CustomEnv, self).__init__()
\t\t# Define Action and Observation space :: gym.spaces objects 중 하나

\t\t### Action : Example when using discrete actions
\t\t# self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS)
\t\t# 0-Left, 1-Right, 3-Up, 2-Down
\t\tself.action_space = spaces.Discrete(4)

\t\t### Obs : Example for using image as input (channel-first; channel-last also works):
\t\t# self.observation_space = spaces.Box(
\t\t# \tlow=0, high=255,
\t\t# \tshape=(N_CHANNELS, HEIGHT, WIDTH), dtype=np.uint8
\t\t# )
\t\tobservation = [
\t\t\tapple_delta_x, apple_delta_y,
\t\t\tsnake_length
\t\t] + list(self.prev_actions)

\t\t### Reward :
# self.total_reward = len(self.snake_position) - 3
\tdef step(self, action):
\t\t...
\t\treturn observation, reward, done, info
\t
\tdef reset(self):
\t\t...
\t\treturn observation  # reward, done, info can't be included

# --------------------------------------------------------------------------
\tdef render(self, mode='human'):
\t\t...

\tdef close (self):
\t\t...
# ------------------------------------------------------------------------------
env.close()```

defining our action space and observation space.

## action space

4 clear possible actions

`self.action_space = spaces.Discrete(4)`

## observation space

게임이미지 보다는.. 어설프더라도 좀더 구체적인 수치.

뱀머리 위치, 사과 위치, 뱀몸통 위치,

```observation = [head_x, head_y,
apple_delta_x, apple_delta_y,
snake_length] + list(self.prev_actions)```

## reward

뱀길이

`self.total_reward = len(self.snake_position) - 3  # start length is 3`

## reset()

episode시작시 마다 (steps이 시작하기전에) 호출

Categories: RL

Blog Owner

Subscribe
Notify of