[강화시스터즈 1기/프로젝트/환경세미나] 지뢰마스터즈 01팀

지뢰찾기 환경

예시

def __init__(params):
     self.board = self.make_init_board()
     
# 중략

def make_init_board(self):
    board = np.ones(shape=(2,self.nrows, self.ncols),dtype='object') # (revealed_or_not, game_board)
    actual_board = self.seed_mines()
    actual_board = self.complete_actual_board(actual_board)
    board[1] = actual_board

    return board

Env

def __init__(self, height=9, width=9, num_mines=10):
        self.height = height
        self.width = width
        self.num_mines = num_mines
        self.reset()
def reset(self):
        # Initialize fields
        self.minefield = np.zeros((self.height, self.width), dtype = int)  # Correct answer
        self.playerfield = np.full((self.height, self.width), 9)  # Player's view
        self.exploded = False
        self.done = False

        # Place mines
        mines = set()
        while len(mines) < self.num_mines:
            x, y = random.randint(0, self.height - 1), random.randint(0, self.width - 1)
            if (x, y) not in mines:
                mines.add((x, y))
                self.minefield[x, y] = -1

        # Calculate numbers
        for (x, y) in mines:
            for i in range(max(0, x - 1), min(self.height, x + 2)):
                for j in range(max(0, y - 1), min(self.width, y + 2)):
                    if self.minefield[i, j] != -1:
                        self.minefield[i, j] += 1

        # Uncover a safe cell to start
        while True:
            sx, sy = random.randint(0, self.height - 1), random.randint(0, self.width - 1)
            if self.minefield[sx, sy] != -1:
                self._uncover(sx, sy)
                break

        self.playerfield = self.playerfield.reshape(-1, self.width * self.height)
        return self.playerfield
def step(self, action_idx):
    x, y = divmod(action_idx, self.width)
    reward = 0

    if self.playerfield[x, y] == 9:
        self._uncover(x, y)
        if self.playerfield[x, y] == -1:
            reward = -10
            self.exploded = True
            self.done = True

        elif self.playerfield[x, y] == 0:
            self.auto_reveal_blocks(x, y)
            reward = 1
            self.exploded = False
            self.done = False

        else:
            reward = 1
            self.exploded = False
            self.done = False

    if not self.exploded and (np.sum(self.playerfield == 9) == self.num_mines):
            self.done = True
            reward = 10

    return self.playerfield, reward, self.exploded, self.done
def _uncover(self, x, y):
    self.playerfield[x, y] = self.minefield[x, y]
    return self.playerfield

주어진 x, y 위치의 타일을 self.minefield 에서 가져와 self.playerfield에 반영하고 반환한다.

def auto_reveal_blocks(self, x, y):
    queue = [(x, y)]
    while queue:
        cx, cy = queue.pop(0)
        for i in range(max(0, cx - 1), min(self.height, cx + 2)):
            for j in range(max(0, cy - 1), min(self.width, cy + 2)):
                if self.playerfield[i, j] == 9:
                    self._uncover(i, j)
                    if self.minefield[i, j] == 0:
                        queue.append((i, j))

시작 타일인 (x,y) 를 큐에 추가하고 큐가 비지 않을 때까지 큐에서 첫번째 요소를 꺼내 cx, cy로 반환

https://velog.io/@keum0821/그래프Graph-BFS의-개념과-구현

def render(self):
    render_str = ''
    for x in range(self.height):
        for y in range(self.width):
              self.playerfield = self.playerfield.reshape(self.height, self.width) # 
            if self.playerfield[x, y] == 9:
                render_str += 'H '  # H for hidden
            elif self.playerfield[x, y] == -1:
                render_str += 'M '  # M for mine
            else:
                render_str += f'{self.playerfield[x, y]} '
        render_str += '\n'
    print(render_str)

self.playerfield 를 기반으로 현재 사용자의 필드를 문자열로 만들어 출력하는 메서드

if __name__ == "__main__":
    env = MinesweeperEnv()
    state = env.reset()
    env.render()

    done = False
    while not done:
        action = random.randint(0, env.width * env.height - 1)  # Random action for demonstration
        state, reward, done, info = env.step(action)
        env.render()
        if done:
            break

auto_reveal_blocks 나 종료조건이 잘 실행되는지 확인

```




Comments