class RL implements Player { private long played,won; private double[][] q = new double[10000][6]; private int[][] e = new int[25][2]; private int[] eN = new int[2]; private int a, box; private double ALPHA,GAMMA; public void reset() { played = won = 0; } private int getBox(State s) { int temp = s.currentNum; for(int i = 0; i < s.cards.length; i++) { temp *=2; if (s.cards[i]>0) temp++; } return temp; } /* private void reward( int rwd, State s ) { double rhat; int cBox = getBox(s); int best = 0; for (int i = 0; i < 6; i++) if (q[cBox,best] < q[cBox,i]) best = i; for(int i = 0; i < eN[0]; i++) if (q[e[i][0]/10][e[i][0]%10] < 20000000000) q[e[i][0]/10][e[i][0]%10] += ALPHA*((i+1)/eN[0])*rhat; }*/ private void sarsa(int traceToUse, int s, int a, int sPrime, int aPrime, int r ) { double rhat = r + GAMMA*q[sPrime][aPrime] - q[s][a]; for( int i = 0; i < eN[traceToUse]; i++) if (Math.abs(q[e[i][traceToUse]/10][e[i][traceToUse]%10]) < 2147483640 ) q[e[i][traceToUse]/10][e[i][traceToUse]%10] += ALPHA*((i+1)/eN[traceToUse])*rhat; } public long gamesPlayed() { return played; } public long gamesWon() { return won; } public int makeMove(State state) { a = 0; box = getBox(state); for (int i = 0; i < 6; i++) if ( q[box][a] < q[box][i] ) a = i; if ( (int)(Math.random()*300) == 0 ) a = (int)(Math.random()*6); while( state.cards[a] < 1 ) { a = (int)(Math.random()*6); } e[eN[0]++][0] = box*10 + a; return a; } public void onYourMove(State state) { int sPrime = getBox(state); int s = box; //no change required to a, it was the last action you picked int aPrime = 0; for (int i = 0; i < 6; i++) if (q[sPrime][aPrime] < q[sPrime][i]) aPrime = i; int r = 0; if (this == state.loser) r = -1; sarsa(0, s, a, sPrime, aPrime, r ); } public void onStart(State state) { a = -1; box = getBox(state); eN[0] = 0; eN[1] = 0; ALPHA = 0.001; GAMMA = 0.9; } public void onEnd(State state) { State s = state.revertedCopy(); if (!(this == state.loser)) { won++; int sPrime = getBox(s); //the opponent's starting position is your ending one sarsa(0, box, a, sPrime, 0, 1); } else { int sPrime = getBox(state); int sCurr = getBox(s); sarsa(1, box, a, sPrime, 0, -1); } played++; } public void onOpponentMove(State state) { State prev = state.revertedCopy(); int sPrime = getBox(state); int a = state.lastPlay; int s = getBox(prev); e[ eN[1]++ ][1] = s*10+a; int aPrime = 0; for(int i =0; i <6; i++) if (q[sPrime][aPrime]