gpt4 book ai didi

ios - 将 Vision VNTextObservation 转换为字符串

转载 作者:IT王子 更新时间:2023-10-29 07:36:16 26 4
gpt4 key购买 nike

我正在查看 Apple 的 Vision API documentation,我在 UIImages 中看到了几个与文本检测相关的类:

1) class VNDetectTextRectanglesRequest

2) class VNTextObservation

看起来他们可以检测字符,但我看不到对字符做任何事情的方法。一旦检测到字符,您将如何将它们变成 NSLinguisticTagger 可以解释的东西?

这是一篇对 Vision 进行简要概述的帖子。

感谢阅读。

最佳答案

这是怎么做的...

    //
// ViewController.swift
//


import UIKit
import Vision
import CoreML

class ViewController: UIViewController {

//HOLDS OUR INPUT
var inputImage:CIImage?

//RESULT FROM OVERALL RECOGNITION
var recognizedWords:[String] = [String]()

//RESULT FROM RECOGNITION
var recognizedRegion:String = String()


//OCR-REQUEST
lazy var ocrRequest: VNCoreMLRequest = {
do {
//THIS MODEL IS TRAINED BY ME FOR FONT "Inconsolata" (Numbers 0...9 and UpperCase Characters A..Z)
let model = try VNCoreMLModel(for:OCR().model)
return VNCoreMLRequest(model: model, completionHandler: self.handleClassification)
} catch {
fatalError("cannot load model")
}
}()

//OCR-HANDLER
func handleClassification(request: VNRequest, error: Error?)
{
guard let observations = request.results as? [VNClassificationObservation]
else {fatalError("unexpected result") }
guard let best = observations.first
else { fatalError("cant get best result")}

self.recognizedRegion = self.recognizedRegion.appending(best.identifier)
}

//TEXT-DETECTION-REQUEST
lazy var textDetectionRequest: VNDetectTextRectanglesRequest = {
return VNDetectTextRectanglesRequest(completionHandler: self.handleDetection)
}()

//TEXT-DETECTION-HANDLER
func handleDetection(request:VNRequest, error: Error?)
{
guard let observations = request.results as? [VNTextObservation]
else {fatalError("unexpected result") }

// EMPTY THE RESULTS
self.recognizedWords = [String]()

//NEEDED BECAUSE OF DIFFERENT SCALES
let transform = CGAffineTransform.identity.scaledBy(x: (self.inputImage?.extent.size.width)!, y: (self.inputImage?.extent.size.height)!)

//A REGION IS LIKE A "WORD"
for region:VNTextObservation in observations
{
guard let boxesIn = region.characterBoxes else {
continue
}

//EMPTY THE RESULT FOR REGION
self.recognizedRegion = ""

//A "BOX" IS THE POSITION IN THE ORIGINAL IMAGE (SCALED FROM 0... 1.0)
for box in boxesIn
{
//SCALE THE BOUNDING BOX TO PIXELS
let realBoundingBox = box.boundingBox.applying(transform)

//TO BE SURE
guard (inputImage?.extent.contains(realBoundingBox))!
else { print("invalid detected rectangle"); return}

//SCALE THE POINTS TO PIXELS
let topleft = box.topLeft.applying(transform)
let topright = box.topRight.applying(transform)
let bottomleft = box.bottomLeft.applying(transform)
let bottomright = box.bottomRight.applying(transform)

//LET'S CROP AND RECTIFY
let charImage = inputImage?
.cropped(to: realBoundingBox)
.applyingFilter("CIPerspectiveCorrection", parameters: [
"inputTopLeft" : CIVector(cgPoint: topleft),
"inputTopRight" : CIVector(cgPoint: topright),
"inputBottomLeft" : CIVector(cgPoint: bottomleft),
"inputBottomRight" : CIVector(cgPoint: bottomright)
])

//PREPARE THE HANDLER
let handler = VNImageRequestHandler(ciImage: charImage!, options: [:])

//SOME OPTIONS (TO PLAY WITH..)
self.ocrRequest.imageCropAndScaleOption = VNImageCropAndScaleOption.scaleFill

//FEED THE CHAR-IMAGE TO OUR OCR-REQUEST - NO NEED TO SCALE IT - VISION WILL DO IT FOR US !!
do {
try handler.perform([self.ocrRequest])
} catch { print("Error")}

}

//APPEND RECOGNIZED CHARS FOR THAT REGION
self.recognizedWords.append(recognizedRegion)
}

//THATS WHAT WE WANT - PRINT WORDS TO CONSOLE
DispatchQueue.main.async {
self.PrintWords(words: self.recognizedWords)
}
}

func PrintWords(words:[String])
{
// VOILA'
print(recognizedWords)

}

func doOCR(ciImage:CIImage)
{
//PREPARE THE HANDLER
let handler = VNImageRequestHandler(ciImage: ciImage, options:[:])

//WE NEED A BOX FOR EACH DETECTED CHARACTER
self.textDetectionRequest.reportCharacterBoxes = true
self.textDetectionRequest.preferBackgroundProcessing = false

//FEED IT TO THE QUEUE FOR TEXT-DETECTION
DispatchQueue.global(qos: .userInteractive).async {
do {
try handler.perform([self.textDetectionRequest])
} catch {
print ("Error")
}
}

}

override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view, typically from a nib.

//LETS LOAD AN IMAGE FROM RESOURCE
let loadedImage:UIImage = UIImage(named: "Sample1.png")! //TRY Sample2, Sample3 too

//WE NEED A CIIMAGE - NOT NEEDED TO SCALE
inputImage = CIImage(image:loadedImage)!

//LET'S DO IT
self.doOCR(ciImage: inputImage!)


}

override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
// Dispose of any resources that can be recreated.
}
}

您会找到完整的项目 here包括经过训练的模型!

关于ios - 将 Vision VNTextObservation 转换为字符串,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44533148/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com