AI自动化利用OCR图片文本识别做自动化 — APP篇

概述

结合OCR图片文本识别与Uiautomator2图片点击的方法进行appUI自动化

"""
@author: Kahoku
@date: 2024/01
@function: 结合OCR文字识别AI模型 与Uiautomator2的图片点击方法实现绕过定位元素, 通过文字、图片做APP UI自动化。
@   |- OCR模型: CnOcr;  pip install cnocr onnxruntime
    |- Uiautomator2 图片点击环境安装如下
        |- pip install uiautomator2
        |- pip3 install -U "uiautomator2[image]" -i https://pypi.doubanio.com/simple
    |- Uiautomator2 图片核心功能
        |- device = u2.connect(device_name)
        |- 获取图片的坐标位置与匹配的相似度: img_dict = device.image.match(image) 
        |- 点击图片: self.device.image.click(image, timeout=5.0)
"""

import cv2
import uiautomator2 as u2
from cnocr import CnOcr
from difflib import SequenceMatcher


''' 暂时只支持英文的OCR识别, 中文的OCR识别不稳定 '''
class UiautomatorOCR:

    def __init__(self, device_name=None) -> None:
        self.device = u2.connect(device_name)
        # OCR英文识别模型
        self.govee_ocr_en = CnOcr(det_model_name='en_PP-OCRv3_det', rec_model_name='en_PP-OCRv3',context='cpu') # context='cuda:0'
        # OCR中文识别模型
        # self.govee_ocr_zh = CnOcr(rec_model_name='ch_PP-OCRv3'

    def text_similarity(self, a, b):
        return SequenceMatcher(None, a, b).ratio()

    def show_phone_screen(self,screen, width, height):
        
        cv2.namedWindow('Image', cv2.WINDOW_NORMAL)
        # Resize the window
        cv2.resizeWindow('Image', width, height)
        cv2.imshow('Image', screen)
        if (cv2.waitKey(1) & 0xFF) == 27:  # Check for "ESC" key
            cv2.destroyAllWindows()   

    def text_click(self, text):
        """ 文本点击 """
        def _get_text_position_ocr(text, image):
            ocr_results = self.govee_ocr_en.ocr(image)
            for ocr_result in ocr_results:
                if ocr_result["text"] == text:
                    top_left = [int(x) for x in tuple(ocr_result["position"][0])]
                    bottom_right = [int(x) for x in tuple(ocr_result["position"][2])]
                    x =  (top_left[0] + bottom_right[0]) / 2
                    y =  (top_left[1] + bottom_right[1]) / 2
            return x, y

        image = self.get_phone_screenshot()
        x, y = _get_text_position_ocr(text, image)

        self.device.click(x, y)

    def image_click(self, image):
        """ Icon 图片点击 """
        self.device.image.click(image, timeout=5.0)

    def get_phone_screenshot(self):
        return self.device.screenshot(format='opencv')

if __name__ == "__main__":

    d = UiautomatorOCR()

    text = "Hello"      # 手机屏幕出现的内容
    d.text_click(text)  # 点击手机屏幕上的文字