gpt4 book ai didi

Python/OpenCV - 不检测网格

转载 作者:太空宇宙 更新时间:2023-11-03 22:29:39 26 4
gpt4 key购买 nike

以下脚本来自http://projectproto.blogspot.co.uk/2014/07/opencv-python-2048-game-solver.html

import cv2
import numpy as np
import win32api, win32gui, win32ui, win32con, win32com.client
from PIL import Image, ImageFont, ImageDraw, ImageOps

# create training model based on the given TTF font file
# http://projectproto.blogspot.com/2014/07/opencv-python-digit-recognition.html
def createDigitsModel(fontfile, digitheight):
font = ImageFont.truetype(fontfile, digitheight)
samples = np.empty((0,digitheight*(digitheight/2)))
responses = []
for n in range(10):
pil_im = Image.new("RGB", (digitheight, digitheight*2))
ImageDraw.Draw(pil_im).text((0, 0), str(n), font=font)
pil_im = pil_im.crop(pil_im.getbbox())
pil_im = ImageOps.invert(pil_im)
#pil_im.save(str(n) + ".png")

# convert to cv image
cv_image = cv2.cvtColor(np.array( pil_im ), cv2.COLOR_RGBA2BGRA)
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),0)
thresh = cv2.adaptiveThreshold(blur,255,1,1,11,2)

roi = cv2.resize(thresh,(digitheight,digitheight/2))
responses.append( n )
sample = roi.reshape((1,digitheight*(digitheight/2)))
samples = np.append(samples,sample,0)

samples = np.array(samples,np.float32)
responses = np.array(responses,np.float32)

model = cv2.KNearest()
model.train(samples,responses)
return model

class Board(object):
UP, DOWN, LEFT, RIGHT = 1, 2, 3, 4
FONT = "font/ClearSans-Bold.ttf"
def __init__(self, clientwindowtitle):
self.hwnd = self.getClientWindow(clientwindowtitle)
if not self.hwnd:
return
self.hwndDC = win32gui.GetWindowDC(self.hwnd)
self.mfcDC = win32ui.CreateDCFromHandle(self.hwndDC)
self.saveDC = self.mfcDC.CreateCompatibleDC()

self.cl, self.ct, right, bot = win32gui.GetClientRect(self.hwnd)
self.cw, self.ch = right-self.cl, bot-self.ct
self.cl += win32api.GetSystemMetrics(win32con.SM_CXSIZEFRAME)
self.ct += win32api.GetSystemMetrics(win32con.SM_CYSIZEFRAME)
self.ct += win32api.GetSystemMetrics(win32con.SM_CYCAPTION)
self.ch += win32api.GetSystemMetrics(win32con.SM_CYSIZEFRAME)*2

self.saveBitMap = win32ui.CreateBitmap()
self.saveBitMap.CreateCompatibleBitmap(self.mfcDC, self.cw, self.ch)
self.saveDC.SelectObject(self.saveBitMap)

self.tiles, self.tileheight, self.contour = self.findTiles(self.getClientFrame())
if not len(self.tiles):
return
self.digitheight = self.tileheight / 2
self.digitsmodel = createDigitsModel(self.FONT, self.digitheight)

self.update()

def getClientWindow(self, windowtitle):
toplist, winlist = [], []
def enum_cb(hwnd, results):
winlist.append((hwnd, win32gui.GetWindowText(hwnd)))
win32gui.EnumWindows(enum_cb, toplist)
window = [(hwnd, title) for hwnd, title in winlist if windowtitle.lower() in title.lower()]
if not len(window):
return 0
return window[0][0]

def getClientFrame(self):
self.saveDC.BitBlt((0, 0), (self.cw, self.ch),
self.mfcDC, (self.cl, self.ct), win32con.SRCCOPY)

bmpinfo = self.saveBitMap.GetInfo()
bmpstr = self.saveBitMap.GetBitmapBits(True)

pil_img = Image.frombuffer( 'RGB',
(bmpinfo['bmWidth'], bmpinfo['bmHeight']),
bmpstr, 'raw', 'BGRX', 0, 1)

array = np.array( pil_img )
cvimage = cv2.cvtColor(array, cv2.COLOR_RGBA2BGRA)
return cvimage

def findTiles(self, cvframe):
tiles, avgh = [], 0

gray = cv2.cvtColor(cvframe,cv2.COLOR_BGRA2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

def findBoard(contours): # get largest square
ww, sqcnt = 10, None
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
if w>ww and abs(w-h)<w/10:
ww = w
sqcnt = cnt
return sqcnt

board = findBoard(contours)
if board==None:
print 'board not found!'
return tiles, avgh, board

bx,by,bw,bh = cv2.boundingRect(board)
#cv2.rectangle(cvframe,(bx,by),(bx+bw,by+bh),(0,255,0),2)
#cv2.imshow('board',cvframe)
#cv2.waitKey(0)
#cv2.destroyWindow( 'board' )
maxh = bh/4
minh = (maxh*4)/5
count = 0
for contour in contours:
x,y,w,h = cv2.boundingRect(contour)
if y>by and w>minh and w<maxh and h>minh and h<maxh:
avgh += h
count += 1
if not count:
print 'no tile found!'
return tiles, avgh, board

avgh = avgh / count
margin = (bh-avgh*4)/5
for row in range(4):
for col in range(4):
x0 = bx + avgh*col + margin*(col+1)
x1 = x0 + avgh
y0 = by + avgh*row + margin*(row+1)
y1 = y0 + avgh
tiles.append([x0, y0, x1, y1])
#cv2.rectangle(cvframe,(x0,y0),(x1,y1),(0,255,0),2)
#cv2.imshow('tiles',cvframe)
#cv2.waitKey(0)
#cv2.destroyWindow( 'tiles' )
return tiles, avgh, board

def getTileThreshold(self, tileimage):
gray = cv2.cvtColor(tileimage,cv2.COLOR_BGR2GRAY)
row, col = gray.shape
tmp = gray.copy().reshape(1, row*col)
counts = np.bincount(tmp[0])
sort = np.sort(counts)

modes, freqs = [], []
for i in range(len(sort)):
freq = sort[-1-i]
if freq < 4:
break
mode = np.where(counts==freq)[0][0]
modes.append(mode)
freqs.append(freq)

bg, fg = modes[0], modes[0]
for i in range(len(modes)):
fg = modes[i]
#if abs(bg-fg)>=48:
if abs(bg-fg)>32 and abs(fg-150)>4: # 150?!
break
#print bg, fg
if bg>fg: # needs dark background ?
tmp = 255 - tmp
bg, fg = 255-bg, 255-fg

tmp = tmp.reshape(row, col)
ret, thresh = cv2.threshold(tmp,(bg+fg)/2,255,cv2.THRESH_BINARY)
return thresh

def getTileNumbers(self, cvframe):
numbers = []
outframe = np.zeros(cvframe.shape,np.uint8)
def guessNumber(digits):
for i in range(1,16):
nn = 2**i
ss = str(nn)
dd = [int(c) for c in ss]
if set(digits) == set(dd):
return nn
return 0

for tile in self.tiles:
x0,y0,x1,y1 = tile
tileimage = cvframe[y0:y1,x0:x1]
cv2.rectangle(cvframe,(x0,y0),(x1,y1),(0,255,0),2)
cv2.rectangle(outframe,(x0,y0),(x1,y1),(0,255,0),1)

thresh = self.getTileThreshold(tileimage)
contours,hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)

dh = self.digitheight
digits = []
for cnt in contours:
x,y,w,h = cv2.boundingRect(cnt)
if h>w and h>(dh*1)/5 and h<(dh*6)/5:
cv2.rectangle(cvframe,(x0+x,y0+y),(x0+x+w,y0+y+h),(0,0,255),1)
roi = thresh[y:y+h,x:x+w]
roi = cv2.resize(roi,(dh,dh/2))
roi = roi.reshape((1,dh*(dh/2)))
roi = np.float32(roi)
retval, results, neigh_resp, dists = self.digitsmodel.find_nearest(roi, k=1)
digit = int((results[0][0]))
string = str(digit)
digits.append(digit)
cv2.putText(outframe,string,(x0+x,y0+y+h),0,float(h)/24,(0,255,0))

numbers.append(guessNumber(digits))
return numbers, outframe

def getWindowHandle(self):
return self.hwnd

def getBoardContour(self):
return self.contour

def update(self):
frame = self.getClientFrame()
self.tilenumbers, outframe = self.getTileNumbers(frame)
return self.tilenumbers, frame, outframe

def copyTileNumbers(self):
return self.tilenumbers[:]

def getCell(self, tiles, x, y):
return tiles[(y*4)+x]

def setCell(self, tiles, x, y, v):
tiles[(y*4)+x] = v
return tiles

def getCol(self, tiles, x):
return [self.getCell(tiles, x, i) for i in range(4)]

def setCol(self, tiles, x, col):
for i in range(4):
self.setCell(tiles, x, i, col[i])
return tiles

def getLine(self, tiles, y):
return [self.getCell(tiles, i, y) for i in range(4)]

def setLine(self, tiles, y, line):
for i in range(4):
self.setCell(tiles, i, y, line[i])
return tiles

def validMove(self, tilenumbers, direction):
if direction == self.UP or direction == self.DOWN:
for x in range(4):
col = self.getCol(tilenumbers, x)
for y in range(4):
if(y < 4-1 and col[y] == col[y+1] and col[y]!=0):
return True
if(direction == self.DOWN and y > 0 and col[y] == 0 and col[y-1]!=0):
return True
if(direction == self.UP and y < 4-1 and col[y] == 0 and col[y+1]!=0):
return True
if direction == self.LEFT or direction == self.RIGHT:
for y in range(4):
line = self.getLine(tilenumbers, y)
for x in range(4):
if(x < 4-1 and line[x] == line[x+1] and line[x]!=0):
return True
if(direction == self.RIGHT and x > 0 and line[x] == 0 and line[x-1]!=0):
return True
if(direction == self.LEFT and x < 4-1 and line[x] == 0 and line[x+1]!=0):
return True
return False

def moveTileNumbers(self, tilenumbers, direction):
def collapseline(line, direction):
if (direction==self.LEFT or direction==self.UP):
inc = 1
rg = xrange(0, 4-1, inc)
else:
inc = -1
rg = xrange(4-1, 0, inc)
pts = 0
for i in rg:
if line[i] == 0:
continue
if line[i] == line[i+inc]:
v = line[i]*2
line[i] = v
line[i+inc] = 0
pts += v
return line, pts
def moveline(line, directsion):
nl = [c for c in line if c != 0]
if directsion==self.UP or directsion==self.LEFT:
return nl + [0] * (4 - len(nl))
return [0] * (4 - len(nl)) + nl

score = 0
if direction==self.LEFT or direction==self.RIGHT:
for i in range(4):
origin = self.getLine(tilenumbers, i)
line = moveline(origin, direction)
collapsed, pts = collapseline(line, direction)
new = moveline(collapsed, direction)
tilenumbers = self.setLine(tilenumbers, i, new)
score += pts
elif direction==self.UP or direction==self.DOWN:
for i in range(4):
origin = self.getCol(tilenumbers, i)
line = moveline(origin, direction)
collapsed, pts = collapseline(line, direction)
new = moveline(collapsed, direction)
tilenumbers = self.setCol(tilenumbers, i, new)
score += pts

return score, tilenumbers

# AI based on "term2048-AI"
# https://github.com/Nicola17/term2048-AI
class AI(object):
def __init__(self, board):
self.board = board

def nextMove(self):
tilenumbers = self.board.copyTileNumbers()
m, s = self.nextMoveRecur(tilenumbers[:],3,3)
return m

def nextMoveRecur(self, tilenumbers, depth, maxDepth, base=0.9):
bestMove, bestScore = 0, -1
for m in range(1,5):
if(self.board.validMove(tilenumbers, m)):
score, newtiles = self.board.moveTileNumbers(tilenumbers[:], m)
score, critical = self.evaluate(newtiles)
newtiles = self.board.setCell(newtiles,critical[0],critical[1],2)
if depth != 0:
my_m,my_s = self.nextMoveRecur(newtiles[:],depth-1,maxDepth)
score += my_s*pow(base,maxDepth-depth+1)
if(score > bestScore):
bestMove = m
bestScore = score

return bestMove, bestScore

def evaluate(self, tilenumbers, commonRatio=0.25):

maxVal = 0.
criticalTile = (-1, -1)

for i in range(8):
linearWeightedVal = 0
invert = False if i<4 else True
weight = 1.
ctile = (-1,-1)

cond = i%4
for y in range(4):
for x in range(4):
if cond==0:
b_x = 4-1-x if invert else x
b_y = y
elif cond==1:
b_x = x
b_y = 4-1-y if invert else y
elif cond==2:
b_x = 4-1-x if invert else x
b_y = 4-1-y
elif cond==3:
b_x = 4-1-x
b_y = 4-1-y if invert else y

currVal=self.board.getCell(tilenumbers,b_x,b_y)
if(currVal == 0 and ctile == (-1,-1)):
ctile = (b_x,b_y)
linearWeightedVal += currVal*weight
weight *= commonRatio
invert = not invert

if linearWeightedVal > maxVal:
maxVal = linearWeightedVal
criticalTile = ctile

return maxVal, criticalTile

def solveBoard(self, moveinterval=500):
boardHWND = self.board.getWindowHandle()
if not boardHWND:
return False
bx, by, bw, bh = cv2.boundingRect(self.board.getBoardContour())
x0, x1, y0, y1 = bx, bx+bw, by, by+bh

win32gui.SetForegroundWindow(boardHWND)
shell = win32com.client.Dispatch('WScript.Shell')
print 'Set the focus to the Game Window, and the press this arrow key:'
keymove = ['UP', 'DOWN', 'LEFT', 'RIGHT']

delay = moveinterval / 3 # milliseconds delay to cancel board animation effect
prev_numbers = []
while True:
numbers, inframe, outframe = self.board.update()
if numbers != prev_numbers:
cv2.waitKey(delay)
numbers, inframe, outframe = self.board.update()
if numbers == prev_numbers: # recheck if has changed
continue
prev_numbers = numbers
move = ai.nextMove()
if move:
key = keymove[move-1]
shell.SendKeys('{%s}'%key)
print key
cv2.waitKey(delay)
cv2.imshow('CV copy',inframe[y0:y1,x0:x1])
cv2.imshow('CV out', outframe[y0:y1,x0:x1])
cv2.waitKey(delay)
cv2.destroyWindow( 'CV copy' )
cv2.destroyWindow( 'CV out' )


# http://gabrielecirulli.github.io/2048/
# http://ov3y.github.io/2048-AI/
board = Board("2048 - Google Chrome")
#board = Board("2048 - Mozilla Firefox")

ai = AI(board)
ai.solveBoard(360)

print 'stopped.'

我用示例 URL 打开 Google Chrome http://ov3y.github.io/2048-AI/打开,运行脚本出现如下错误:

20.py:109: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
if board==None:
no tile found!
Set the focus to the Game Window, and the press this arrow key:

然后什么也没有,它只是坐在那里。所以我最关心的部分是 no tile found!。取消注释行:

#cv2.rectangle(cvframe,(bx,by),(bx+bw,by+bh),(0,255,0),2)
#cv2.imshow('board',cvframe)
#cv2.waitKey(0)
#cv2.destroyWindow( 'board' )

在屏幕上显示以下窗口:

enter image description here

谁能解释为什么 OpenCV 无法检测到网格,或者如何进行调试?

最佳答案

很可能这不是检测网格的问题,而是捕获浏览器窗口的问题 - 您试图在空图像上查找网格,这当然会失败。首先确保您已正确抓取 firefox/chrome/opera 屏幕窗口 - 在函数 getClientFrame(self) 中放置此代码:

cv2.imshow('browser window', cvimage)
cv2.waitKey(10000)

就在最后的 return cvimage 之前。它应该向您显示浏览器窗口 10 秒钟。如果不是,那么它将 100% 确定问题出在捕获浏览器窗口,而不是检测网格。要检查捕获浏览器窗口有什么问题,请使用 win32api.GetLastError() 函数(您可以检查错误代码 here )。

当然有可能我错了,这是检测网格的问题 - 如果是这样,请提供示例图像(只需保存我提供的代码显示的图像)以便我们进行测试。

\\编辑:
我刚刚注意到你帖子的第二部分 - 所以很可能我错了,但你仍然可以测试它。看起来您正在捕获一个 chrome 窗口和其他窗口的一部分 - 尝试让您的浏览器窗口全屏显示。

\\edit2:

仔细查看您的图像后,我发现了一件奇怪的事情 - 捕获的图像有垂直线并且宽度(右侧没有重复部分)小于原始窗口(但高度似乎没问题)。宽度似乎是原始宽度的 75%,所以我猜想 PIL 将每 4 个字节视为一个像素,但每个像素应该只使用 3 个字节。很难对其进行测试,因为在我的系统(win 8.1 64 位)上它运行良好。可能的解决方案(我无法测试它们,所以您需要检查哪一个可以工作..抱歉 :) ):

  1. 尝试改变这一行:

        pil_img = Image.frombuffer( 'RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1)

    像这样:pil_img = Image.frombuffer( 'RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGR', 0, 1)通常,您需要将第五个参数的值从 BGRX 更改为其他值 - 最有可能更改为“BGR”,完整的选项列表为 here .如果它不起作用,请尝试使用不同的第一个和第五个参数值。

  2. 在屏幕截图上,您似乎使用了一些相当旧的 Windows 版本,或者至少您使用的是旧的图形用户界面(顺便说一句,这很棒!)。如果 - 除了将 gui 样式设置为“旧样式”之外 - 你已经将(或 Windows 已经为你完成)你的颜色质量设置为“最高(32 位)”以外的其他东西,它也可能会导致你的问题。尝试将其设置为“最高(32 位)”。需要明确的是——我说的是这个窗口中的设置:
    enter image description here
    (在右侧,靠近底部和调色板)。
  3. 如果您有 2 个(或更多)屏幕,请在只使用一个屏幕的情况下测试您的程序。此外,如果您正在使用一些替代窗口管理器(或其他一些奇怪的扩展程序,例如用于多个桌面的东西),请将其关闭并重试。

关于Python/OpenCV - 不检测网格,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28880784/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com