# (be prepared for extreme code sloppiness) # strategies: # 1. search - graph alpha beta # 2. variable search depths based on how well the agent's doing on time. that required # having the agent time and track his moves. # 3. (somewhat-and-yet-to-be-proven-)helpful time management # 4. kinda complicated board evaluation that evaluates start-to-midgame boards # differently than endgame boards and takes a bunch of differentials into account: # a. differently types of squares (a,b,c,x,etc) differential # b. stable squares differential (this isn't entirely true...i wish i knew # how to efficiently count the number of stable squares...for now what i do is # check if there's any corner squares, and then count all squares connected # to that corner square along either edge as stable. this is dumb because it # excludes stable squares that aren't on an edge from being counted :( but # i can't figure out how to freaking compute the true number of stable squares...) # c. mobility...er...i couldn't figure out how to do a differential since you only know # the number of moves available to you... # 5. also tried to order the initial set of actions considered for graph alpha beta # intelligently by running a low-depth search first and then doing the full-depth # search with the returned move evaluated first...hopefully this would make the second # alpha beta search faster...but that didn't seem to be the case, so ditched that and # opted to order moves by location (corner before a-sq before before all else). that # seemed to work well # 6. a very very very mini opening-book :P if you look at the first few lines in getAction # and you'll see what i mean. # overall, i spent most of my time after implementing the above by trying to # find bottlenecks manually. # that was really dumb because some of the things i thought would help # made things worse, often for reasons beyond my grasp. the range17 variable # is an example of something that actually helped a tiny amount. # thanks for listening. # -akuma import games import random import util from os import times as t quotes = [ 'Do you expect me to talk?', 'Shocking. Positively shocking.', 'I can think of something more sociable to do.', 'Oh please don\'t, not on my account.', 'Yeah, looks like he came to a dead end.', 'The world is not enough.', 'I thought Christmas only comes once a year.', 'Can I expect the pleasure of you in Iceland?', 'Do I look like I give a damn?', 'I think he got the point.', 'Well, let me try and enlarge your vocabulary.', 'I\m afraid you\'ve caught me with more than my hands up.', 'Well, I don\'t want to risk losing me either.', 'Get your clothes on and I\'ll buy you an ice cream.', 'Now I shall be all alone.', 'Now there\'s a name to die for.', 'You burned me, and now you want my help? ', 'Why do people who can\'t take advice always insist on giving it?', 'Did you bring any chocolates?', 'I always thought M was a randomly assigned letter. I had no idea it stood for ...', 'I\'m sorry I\'m not sorry.', 'If you had just been born wouldn\'t you be naked?', 'I won\'t consider myself to be in trouble until I start weeping blood.', 'The job\'s done and the bitch is dead.', 'Yes...considerably.', 'I have no armour left. You\'ve stripped it from me. Whatever is left of me - whatever is left of me - whatever I am - I\'m yours. ', 'Vesper? I do hope you gave your parents hell for that.', 'If I fail to report, 008 replaces me.', 'Ejector seat? You\'re joking!', 'You plan to break into the world\'s largest bank, but not to steal anything. Why?', 'Nobody takes the time to do a real sinister interrogation. It\'s a lost art.', 'Tell me, who\'s strangling the cat?', 'They always said the pen was mightier than the sword.', 'Trust? What a quaint idea.', 'Half of everything is luck.', 'James Bond, stiff-assed Brit...', 'How long did you say the fuse was?', 'Moneypenny, I\'m devastated.', 'I\'m a little tied up!', 'I\'m alone.' ] range17 = range(1,7) diag4 = [(1,5),(2,6),(1,2),(2,1),(5,1),(6,2),(5,6),(6,5)] asquares = [(0,2),(2,0),(2,7),(0,5),(5,0),(7,2),(5,7),(7,5)] bsquares = [(0,3),(3,0),(3,7),(0,4),(4,0),(7,3),(4,7),(7,4)] csquares = [(0,1),(1,0),(1,7),(0,6),(6,0),(7,1),(6,7),(7,6)] xsquares = [(1,1),(6,6),(6,1),(1,6)] centers = [(3,3),(3,4),(4,3),(4,4)] innercorners = [(2,5),(2,2),(5,5),(5,2)] othersquares = [(3,1),(4,1),(3,1),(4,2),(1,3),(1,4),(2,3),(2,4),(3,5),(3,6),(4,5),(4,6),(5,3),(5,4),(6,3),(6,4)] corners = [(0,0),(7,0),(7,7),(0,7)] FIRST = None boardValues = {'corner':25, 'x':-10, 'a':7, 'b':4, 'c':-3, 'diag4':-4, 'center':-5, 'innercorner':2, 'other':1} boardValuesEnd = {'corner':25, 'x':-10, 'a':10, 'b':7, 'c':-2, 'diag4':-3, 'center':-2, 'innercorner':4, 'other':1} vals = boardValues class GameAgent: """ Abstract GameAgent class. Your agents should inherit from this class and override the getAction method. """ def getAction(self, gameState): """ Return a move in gameState.actions(), where gameState is an instance of a subclass of GameState. """ abstract class UtilityDirectedGameAgent(GameAgent): """ A game agent that chooses the action with the highest utility """ def __init__(self, searcher): self.searcher = searcher; def getAction(self, currentState): bestAction, value = self.searcher.getBestActionAndValue(currentState) #print "Got state value %s with best action %s" % (value, bestAction) return bestAction class HumanGameAgent(GameAgent): """A game agent that asks the user what action to take. """ def getAction(self, currentState): """ Print useful information, and then ask the user what action to take. """ print "\n" if currentState.getPreviousMove(): print "\nOpponent's move was %s" % str(currentState.getPreviousMove()) print currentState legalActions = currentState.actions() try: action = input("\nYour move? ") except (KeyboardInterrupt, SystemExit): raise except: action = None while action not in legalActions: print "That is not a legal move. Please enter one of %s" % legalActions try: action = input("\nYour move? ") except (KeyboardInterrupt, SystemExit): raise except: action = None return action class GameTreeSearcher: """ Interface for classes that search the game tree exhaustively, returning a maximum-utility action and its value (or a bound thereupon) """ def getBestActionAndValue(self, currentState): """ Return the best action and its value in currentState for currentState.currentPlayer() """ abstract # this thing...doesn't work? in any case, it was much slower than alpha beta, # leading me to believe mtd-f stands for 'man...took days finishing'. i # probably did something wrong somewhere. # doubleagent used to be mtdfagent before he got promoted class MTDf(GameTreeSearcher): def getBestActionAndValue(self, currentState, f = 0): g = f d = currentState.remainingDepth alphabeta = GraphAlphaBeta() upper = float("infinity") lower = float("-infinity") while lower < upper: if g == lower: beta = g + 1 else: beta = g currentState.remainingDepth = d action, g = alphabeta.getBestActionAndValue(currentState, beta-1, beta) if g < beta: upper = g else: lower = g return action, g class DoubleAgent: def __init__(self, searchDepth): print 'Bond. James Bond.' self.heuristicEvalFn = evalBoard self.searchDepth = searchDepth self.numactions = 0 self.searcher = GraphAlphaBetaMax() self.acctime = 0 self.lastmove = 20 self.tempmax = 15 def getAction(self, currentState): stime = t()[0] self.numactions += 1 prev = currentState.getPreviousMove() if self.numactions == 1: if not prev: self.opening = True return (2,3) else: self.opening = False elif self.opening and self.numactions == 2: if prev == (2,2): return (3,2) elif prev == (4,2): return (5,3) else: self.opening = False elif self.opening and self.numactions == 3: if prev == (2,4): return (3,5) elif prev == (4,2): return (3,1) else: self.opening = False if 30 > self.numactions > 2 and self.numactions != 20: if self.lastmove <= 10: self.searchDepth += 1 elif 120 >= self.lastmove >= 40: self.searchDepth -= 1 # panic!! elif self.lastmove > 120: self.tempmax = self.searchDepth self.searchDepth -= 2 if self.searchDepth <= 0: self.searchDepth = 2 elif self.searchDepth > self.tempmax and self.numactions < 20: self.searchDepth = self.tempmax blanks = sum([row.count(' ') for row in currentState.board]) global vals if blanks < 20: vals = boardValuesEnd else: vals = boardValues #print 'move %d: choosing between %d actions at depth %d' % (self.numactions, len(currentState.actions()), self.searchDepth) wrappedState = games.FixedDepthHeuristicGameStateAdapter(currentState, self.heuristicEvalFn, self.searchDepth) bestAction, value = self.searcher.getBestActionAndValue(wrappedState, True) etime = t()[0] self.acctime += (etime - stime) self.lastmove = (etime - stime) print random.choice(quotes) print '> %d %d %f %f' % (self.numactions, self.searchDepth,self.lastmove, self.acctime) return bestAction class AlphaBetaMax(GameTreeSearcher): def getBestActionAndValue(self, currentState, first=False, alpha = -1, beta = 1): """ Return the best action and its value in currentState for currentState.currentPlayer() """ util = currentState.currentPlayerUtility() if util != None: return None, util bestAction, bestValue = None, float("-infinity") for action, successor in (orderSuccessors(currentState) if first else currentState.successors()): nextValue = self.getBestActionAndValue(successor, False, -beta, -alpha)[1] if -nextValue > bestValue: bestAction, bestValue = action, -nextValue alpha = max(alpha, bestValue) if alpha >= beta: break # Pruning! return bestAction, bestValue class AlphaBeta(GameTreeSearcher): def getBestActionAndValue(self, currentState, alpha = -1, beta = 1): """ Return the best action and its value in currentState for currentState.currentPlayer() """ util = currentState.currentPlayerUtility() if util != None: return None, util bestAction, bestValue = None, float("-infinity") for action, successor in currentState.successors(): nextValue = self.getBestActionAndValue(successor, -beta, -alpha)[1] if -nextValue > bestValue: bestAction, bestValue = action, -nextValue alpha = max(alpha, bestValue) if alpha >= beta: break # Pruning! return bestAction, bestValue class GraphAlphaBeta(AlphaBeta): def __init__(self): self.getBestActionAndValue = util.memoize(self.getBestActionAndValue) class GraphAlphaBetaMax(AlphaBetaMax): def __init__(self): self.getBestActionAndValue = util.memoize(self.getBestActionAndValue) class FixedDepthHeuristicGameAgent(UtilityDirectedGameAgent): """A game agent that chooses the action with the max expected utility according to heiristicEvalFn, after looking ahead by searchDepth ply. You will create instances of this class to use your heuristic functions, but you don't need to understand how it works in detail. """ def __init__(self, searcher, heuristicEvalFn, searchDepth): self.searcher = searcher; self.heuristicEvalFn = heuristicEvalFn self.searchDepth = searchDepth def getAction(self, currentState): wrappedState = games.FixedDepthHeuristicGameStateAdapter(currentState, self.heuristicEvalFn, self.searchDepth) return UtilityDirectedGameAgent.getAction(self, wrappedState) def edgeCornerSquareValue(size, x, y): nEdges = 0 if x == 0 or x == size-1: nEdges += 1 if y == 0 or y == size-1: nEdges += 1 return (1,4,10)[nEdges] board = state.board return 0 def evalBoard(state): value = 0 maxvalue = 500.0 board = state.board currentPlayer = state.currentPlayer() as = [0,0] bs = [0,0] cs = [0,0] corns = [0,0] diag4s = [0,0] cents = [0,0] innercorns = [0,0] xs = [0,0] others = [0,0] for x,y in diag4: if board[y][x] != ' ': if board[y][x] == currentPlayer: diag4s[0]+=1 else: diag4s[1]+=1 for x,y in corners: if board[y][x] != ' ': if board[y][x] == currentPlayer: corns[0]+=1 else: corns[1]+=1 for x,y in asquares: if board[y][x] != ' ': if board[y][x] == currentPlayer: as[0]+=1 else: as[1]+=1 for x,y in bsquares: if board[y][x] != ' ': if board[y][x] == currentPlayer: bs[0]+=1 else: bs[1]+=1 for x,y in csquares: if board[y][x] != ' ': if board[y][x] == currentPlayer: cs[0]+=1 else: cs[1]+=1 for x,y in xsquares: if board[y][x] != ' ': if board[y][x] == currentPlayer: xs[0]+=1 else: xs[1]+=1 for x,y in centers: if board[y][x] != ' ': if board[y][x] == currentPlayer: cents[0]+=1 else: cents[1]+=1 for x,y in innercorners: if board[y][x] != ' ': if board[y][x] == currentPlayer: innercorns[0]+=1 else: innercorns[1]+=1 for x,y in othersquares: if board[y][x] != ' ': if board[y][x] == currentPlayer: others[0]+=1 else: others[1]+=1 value += (as[0]-as[1])*vals['a'] value += (bs[0]-bs[1])*vals['b'] value += (cs[0]-cs[1])*vals['c'] value += (xs[0]-xs[1])*vals['x'] value += (diag4s[0]-diag4s[1])*vals['diag4'] value += (corns[0]-corns[1])*vals['corner'] value += (cents[0]-cents[1])*vals['center'] value += (innercorns[0]-innercorns[1])*vals['innercorner'] value += (others[0]-others[1])*vals['other'] pstability = 0 ostability = 0 if corners[0] > 0 or corners[1] > 0: stability = 0 if board[0][0] != ' ': cplayer = board[0][0] stability += 1 for i in range17: if board[i][0] is cplayer: stability += 1 else: break for i in range17: if board[0][i] is cplayer: stability += 1 else: break if cplayer is currentPlayer: pstability += stability else: ostability += stability stability = 0 if board[7][7] != ' ': cplayer = board[7][7] stability += 1 for i in range17: if board[7-i][7] is cplayer: stability += 1 else: break for i in range17: if board[7][7-i] is cplayer: stability += 1 else: break if cplayer is currentPlayer: pstability += stability else: ostability += stability stability = 0 if board[0][7] != ' ': cplayer = board[0][7] stability += 1 if not board[0][0]: for i in range17: if board[0][7-i] is cplayer: stability += 1 else: break if not board[7][7]: for i in range17: if board[i][7] is cplayer: stability += 1 else: break if cplayer is currentPlayer: pstability += stability else: ostability += stability stability = 0 if board[7][0] != ' ': cplayer = board[7][0] stability += 1 if not board[0][0]: for i in range17: if board[7-i][0] is cplayer: stability += 1 else: break if not board[7][7]: for i in range17: if board[7][i] is cplayer: stability += 1 else: break if cplayer is currentPlayer: pstability += stability else: ostability += stability mobility = len(state.actions()) value += (pstability-ostability) * 5 value += mobility * 6 return value/maxvalue def f(s): if s[0] is FIRST: return -1 elif s[0] in corners: return 0 + len(s[1].actions()) elif s[0] in asquares: return 1 + len(s[1].actions()) elif s[0] in bsquares: return 2 + len(s[1].actions()) elif s[0] in xsquares: return 4 + len(s[1].actions()) else: return 3 + len(s[1].actions()) def orderSuccessors(state): states = state.successors() states.sort(key=f) return states def getOthelloAgent(): return DoubleAgent(6)