[강화시스터즈 1기/프로젝트/환경세미나] 폭탄제거부대 01팀

init

| input | 그리드월드 사이즈, 지뢰 개수 | | — | — |

def __init__(self, gridworld_size:Tuple, num_mine:int):

    self.gridworld_size = gridworld_size
    self.nrow, self.ncol = self.gridworld_size
    self.num_mine = num_mine

    # 그리드월드의 좌표(튜플)의 리스트
    # points == action space
    self.points = []
    for i in range(self.nrow):
        for j in range(self.ncol):
            self.points.append((i,j))
    self.num_actions = len(self.points)

    # 보상 딕셔너리
    self.reward_dict = {'mine':-10, 'empty':1, 'clear':10}

    self.points_unvisit = self.points.copy()

    # 지뢰 랜덤으로 배정
    self.mine_points = random.sample(self.points, self.num_mine)

    # 그리드 월드 rendering (지뢰: 'M')
    self.gridworld = np.full(shape=(self.nrow, self.ncol), fill_value=".")
    for x,y in self.mine_points:
        self.gridworld[x,y] = 'M'

    # 지뢰 = True인 맵
  self.mine_bool = (self.gridworld=='M')

    # 주변 지뢰 개수를 표시한 맵 (지뢰 위치: -1)
    self.map_answer = np.zeros(self.gridworld_size)
    for x,y in self.points:
        cnt = self.check_mine((x,y))
        self.map_answer[x,y] = cnt

    # state 맵
    self.present_state = np.full((self.nrow, self.ncol), -2) # BFS로 탐색하지 않은 부분을 -2로 초기화
    
    # 방문 맵(bool)
    self.visited_map = (self.present_state == self.map_answer)

맵 생성 관련 함수

check_mine

def check_mine(self, coord:Tuple):
        directions = [(-1, 0), (1, 0), (0, -1), (0, 1),
                  (-1, -1), (-1, 1), (1, -1), (1, 1)]
        x, y = coord
        result = 0

        if self.mine_bool[x,y]:
            result = -1

        else:
            for dx, dy in directions:
                nx, ny = x + dx, y + dy
                if 0 <= nx < self.nrow and 0 <= ny < self.ncol:
                    if self.mine_bool[nx, ny]:
                        result += 1
        return int(result)

directions = [(-1, 0), (1, 0), (0, -1), (0, 1),
              (-1, -1), (-1, 1), (1, -1), (1, 1)]
x, y = coord
result = 0
if self.mine_bool[x,y]:
		result = -1
else:
    for dx, dy in directions:
        nx, ny = x + dx, y + dy
        if 0 <= nx < self.nrow and 0 <= ny < self.ncol:
            if self.mine_bool[nx, ny]:
                result += 1

bfs_minesweeper

def bfs_minesweeper(self, clicked_point:Tuple):
    queue = deque([clicked_point])
    directions = [(-1, 0), (1, 0), (0, -1), (0, 1),
                (-1, -1), (-1, 1), (1, -1), (1, 1)]

    # 방문 맵(bool)
    visited = self.visited_map.copy()

    result = self.present_state.copy()

    while queue:
        x, y = queue.popleft()
        if visited[x, y]:
            continue
        visited[x, y] = True
        result[x, y] = self.map_answer[x, y]

        if self.map_answer[x,y] == 0:
            for dx, dy in directions:
                nx, ny = x + dx, y + dy
                if 0 <= nx < self.nrow and 0 <= ny < self.ncol and not visited[nx, ny]:
                    queue.append((nx, ny))

		self.visited_map = visited
		
    return result


directions = [(-1, 0), (1, 0), (0, -1), (0, 1),
                (-1, -1), (-1, 1), (1, -1), (1, 1)]
visited = self.visited_map.copy()
result = self.present_state.copy()
queue = deque([clicked_point])
while queue:
        x, y = queue.popleft()
        if visited[x, y]:
            continue
        visited[x, y] = True
        result[x, y] = self.map_answer[x, y]

        if self.map_answer[x,y] == 0:
            for dx, dy in directions:
                nx, ny = x + dx, y + dy
                if 0 <= nx < self.nrow and 0 <= ny < self.ncol and not visited[nx, ny]:
                    queue.append((nx, ny))
self.visited_map = visited

gridworld_reset

def gridworld_reset(self):
    # 그리드 월드 rendering (지뢰: 'M')
    self.gridworld = np.full(shape=(self.nrow, self.ncol), fill_value=".")
    for x,y in self.mine_points:
        self.gridworld[x,y] = 'M'

    # 지뢰 = True인 맵
    self.mine_bool = (self.gridworld=='M')

    # 주변 지뢰 개수를 표시한 맵 (지뢰 위치: -1)
    self.map_answer = np.zeros(self.gridworld_size)
    for x,y in self.points:
        cnt = self.check_mine((x,y))
        self.map_answer[x,y] = cnt

    self.present_state = np.full((self.nrow, self.ncol), -2)
    
    # 방문 맵(bool)
    self.visited_map = (self.present_state == self.map_answer)

move_mine

def move_mine(self, action:Tuple):
    empty_points = list(set(self.points) - set(self.mine_points))
    new_mine = random.sample(empty_points, 1)

    self.mine_points.remove(action)
    self.mine_points.append(new_mine[0])

    self.gridworld_reset()

empty_points = list(set(self.points) - set(self.mine_points))
new_mine = random.sample(empty_points, 1)
self.mine_points.remove(action)
self.mine_points.append(new_mine[0])

self.gridworld_reset()

step

def step(self, action:Tuple):
    x, y = action

    # 첫번째 action인 경우
    if np.sum(self.visted_map) == 0 :
        if action in self.mine_points:
            # 만약 start 좌표에 지뢰가 있는 경우 옮기기
            self.move_mine(action)

    # action에 따라 계산된 state
    next_state = self.bfs_minesweeper(action)

    # action에 따라 바뀌는 방문 맵(bool)
    self.visited_map = (next_state == self.map_answer)

    # ======
    # reward
    if action in self.mine_points:
        # 지뢰 밟은 경우 -> 지뢰찾기 실패
        # 음수의 보상과 함께 에피소드 종료
        reward = self.reward_dict['mine']
        done = True

    else :
        reward = self.reward_dict['empty']
        done = False

    # ======
    # 밟지 않은 좌표 개수 == 지뢰 개수 -> 지뢰찾기 성공
    if np.sum(self.visited_map==True) == self.num_mine:
        reward = self.reward_dict['clear']
        clear = True # 성공했는지 여부 판단을 위해
        done = True

    else :
        clear = False

    # 현재 위치 업데이트, 경로 추가
    self.present_state = next_state

    return next_state, reward, done, clear

if np.sum(self.visted_map) == 0 :
    if action in self.mine_points:
        self.move_mine(action)
next_state = self.bfs_minesweeper(action)
if action in self.mine_points:
    reward = self.reward_dict['mine']
    done = True

else :
    reward = self.reward_dict['empty']
    done = False
if np.sum(self.visited_map==True) == self.num_mine:
    reward = self.reward_dict['clear']
    clear = True # 성공했는지 여부 판단을 위해
    done = True

else :
    clear = False
self.present_state = next_state

reset

def reset(self):
    self.mine_points = random.sample(self.points, self.num_mine)
    self.gridworld_reset()

렌더링 관련 함수

render

def render(self, state):
    render_state = np.full(shape=(self.nrow, self.ncol), fill_value=".")

    for (i,j) in self.points:
        if state[i,j] == -2:
            continue
        elif state[i,j] == -1:
            render_state[i,j] = "M"
        else:
            render_state[i,j] = state[i,j]

    render_state = pd.DataFrame(render_state)
    render_state = render_state.style.applymap(self.render_color)
    display(render_state)

render_state = np.full(shape=(self.nrow, self.ncol), fill_value=".")

for (i,j) in self.points:
    if state[i,j] == -2:
        continue
    elif state[i,j] == -1:
        render_state[i,j] = "M"
    else:
        render_state[i,j] = state[i,j]
render_state = pd.DataFrame(render_state)
render_state = render_state.style.applymap(self.render_color)
display(render_state)

render_answer

def render_answer(self):
    render_state = np.full(shape=(self.nrow, self.ncol), fill_value=".")

    for (i,j) in self.points:
        if self.map_answer[i,j] == -1:
            render_state[i,j] = "M"
        else:
            render_state[i,j] = str(self.map_answer[i,j])

    render_state = pd.DataFrame(render_state)
    render_state = render_state.style.applymap(self.render_color)
    display(render_state)

render_color

def render_color(self, var):
    color = {'0':'black', '1':"skyblue", '2':'lightgreen', '3':'red', '4':'violet', '5':'brown',
             '6':'turquoise', '7':'grey', '8':'black', 'M':'white', '.':'black'}
    return f"color: {color[var]}"

train 관련 함수

sample_10

def sample_10(self):
    sample_mine_points = []

    for i in range(10):
        self.mine_points = random.sample(self.points, self.num_mine)
        sample_mine_points.append(self.mine_points)

    return sample_mine_points

train_reset

def train_reset(self, samples:list):
    self.mine_points = random.sample(samples, 1)[0]
    self.gridworld_reset()



Comments