gpt4 book ai didi

android - 在预览camerax android中获取所有文本框

转载 作者:行者123 更新时间:2023-12-05 00:02:39 30 4
gpt4 key购买 nike

我想分析我在预览相机中的框内的所有文本。但是我得到了错误的文本坐标。
image
验证在 contains 上完成

class TestsPhotoscan : AppCompatActivity() {
private lateinit var binding: ActivityMainBinding
private var scaleX = 1F
private var scaleY = 1F
private var rectCrop = Rect()
private var cameraProvider: ProcessCameraProvider? = null
private lateinit var cameraProviderListenableFuture: ListenableFuture<ProcessCameraProvider>

override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
binding = ActivityMainBinding.inflate(layoutInflater)
setContentView(binding.root)

cameraProviderListenableFuture = ProcessCameraProvider.getInstance(this)

// Request camera permissions
if (allPermissionsGranted()) {
startCamera()
} else {
ActivityCompat.requestPermissions(
this,
REQUIRED_PERMISSIONS,
REQUEST_CODE_PERMISSIONS
)
}

binding.borderView.viewTreeObserver.addOnGlobalLayoutListener(object :
ViewTreeObserver.OnGlobalLayoutListener {
override fun onGlobalLayout() {
binding.borderView.viewTreeObserver.removeOnGlobalLayoutListener(this)
val points = IntArray(2)
binding.borderView.getLocationOnScreen(points)
rectCrop = Rect(
points[0],
points[1],
points[0] + binding.borderView.width,
points[1] + binding.borderView.height
)
}
})
}


private fun allPermissionsGranted() = REQUIRED_PERMISSIONS.all {
ContextCompat.checkSelfPermission(
this, it
) == PackageManager.PERMISSION_GRANTED
}

@SuppressLint("UnsafeExperimentalUsageError")
private fun startCamera() {

cameraProviderListenableFuture.addListener(Runnable {
cameraProvider = cameraProviderListenableFuture.get()
binding.viewFinder.post { setupCamera() }
}, ContextCompat.getMainExecutor(this))

}

private fun buildPreviewUseCase(): Preview {
val display = binding.viewFinder.display
val metrics = DisplayMetrics().also { display.getMetrics(it) }
val preview = Preview.Builder()
.setTargetRotation(display.rotation)
.setTargetResolution(Size(metrics.widthPixels, metrics.heightPixels))
.build()
.apply {
setSurfaceProvider(binding.viewFinder.surfaceProvider)
}

return preview
}

private fun setupCamera() {

cameraProviderListenableFuture.addListener({

// Preview
val preview = buildPreviewUseCase()

val imageAnalyzer = ImageAnalysis.Builder()
.build()
.also {
it.setAnalyzer(ContextCompat.getMainExecutor(this),
{ processImage(it) })
}

// Select back camera as a default
val cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA

val useCaseGroup = UseCaseGroup.Builder()
.addUseCase(preview)
.addUseCase(imageAnalyzer)
.build()

try {
// Unbind use cases before rebinding
cameraProvider?.unbindAll()

// Bind use cases to camera
cameraProvider?.bindToLifecycle(
this, cameraSelector, useCaseGroup
)

} catch (exc: Exception) {
Log.e(TAG, "Use case binding failed", exc)
}

}, ContextCompat.getMainExecutor(this))
}

@SuppressLint("UnsafeOptInUsageError")
private fun processImage(imageProxy: ImageProxy) {

setScaleFactor(imageProxy)
recognizeText(
InputImage. fromMediaImage(
imageProxy.image!!,
imageProxy.imageInfo.rotationDegrees
)
).addOnCompleteListener { imageProxy.close() }
}

private fun setScaleFactor(imageProxy: ImageProxy) {
val viewWidth = binding.viewFinder.width.toFloat()
val viewHeight = binding.viewFinder.height.toFloat()
val imageWidth = imageProxy.width.toFloat()
val imageHeight = imageProxy.height

scaleX = viewWidth / imageWidth
scaleY = viewHeight / imageHeight
}

private fun recognizeText(image: InputImage): Task<Text> {

val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)

return recognizer.process(image)
.addOnSuccessListener(
ScopedExecutor(TaskExecutors.MAIN_THREAD),
OnSuccessListener<Text> {
for (block in it.textBlocks) {
for (line in block.lines) {
for (element in line.elements) {
if (rectCrop.contains(
translateX(element.boundingBox?.left ?: -1).roundToInt(),
translateY(element.boundingBox?.top ?: -1).roundToInt()
)
) {

println(element.text)
}
}
}
}

})
}

override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<out String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
if (requestCode == REQUEST_CODE_PERMISSIONS) {
if (allPermissionsGranted()) {
startCamera()
} else {
Toast.makeText(
this,
"Permissions not granted by the user.",
Toast.LENGTH_SHORT
).show()
// finish()
}
return
}
}


companion object {
private const val TAG = "Mytag"
private const val REQUEST_CODE_PERMISSIONS = 10
private val REQUIRED_PERMISSIONS = arrayOf(Manifest.permission.CAMERA)
}

fun translateX(x: Int) =
x * scaleX

fun translateY(y: Int) = y * scaleY
}
和布局
<?xml version="1.0" encoding="utf-8"?>
<layout xmlns:app="http://schemas.android.com/apk/res-auto">
<androidx.constraintlayout.widget.ConstraintLayout
xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:id="@+id/root"
android:layout_height="match_parent">

<androidx.camera.view.PreviewView
android:id="@+id/viewFinder"
android:layout_width="0dp"
android:layout_height="0dp"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />

<View
android:id="@+id/border_view"
android:layout_width="match_parent"
android:layout_height="250dp"
android:layout_margin="16dp"
android:background="@drawable/background_drawable"
app:layout_constraintBottom_toBottomOf="@+id/viewFinder"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />

</androidx.constraintlayout.widget.ConstraintLayout>
</layout>

最佳答案

您遇到的困难是获得从 ImageProxy 中的图像到 PreviewView 显示的图像的良好映射。虽然这听起来很容易,但我不相信有直接的方法来进行这种映射。见answer到一个类似的问题。我查看了实现此答案中的每个建议,尽管它们在某些情况下有效,但在其他情况下却失败了。当然,我可能采取了错误的方法。
我得出的结论是,提取和分析从预览区域提取的位图并识别那些完全被红色矩形包围的单词是最简单的。我用它们自己的红色矩形框住这些词,以表明它们已被正确识别。
enter image description here
以下是重新设计的 Activity ,一个图形叠加层生成了用于显示的单词框和 XML。注释在代码中。祝你好运!
TestPhotoscan.kt

class TestsPhotoscan : AppCompatActivity() {
private lateinit var binding: ActivityMainBinding
private var wordFenceRect = Rect()
private var cameraProvider: ProcessCameraProvider? = null
private lateinit var cameraProviderListenableFuture: ListenableFuture<ProcessCameraProvider>

override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
binding = ActivityMainBinding.inflate(layoutInflater)
setContentView(binding.root)

cameraProviderListenableFuture = ProcessCameraProvider.getInstance(this)

// Request camera permissions
if (allPermissionsGranted()) {
startCamera()
} else {
ActivityCompat.requestPermissions(
this,
REQUIRED_PERMISSIONS,
REQUEST_CODE_PERMISSIONS
)
}
}

private fun allPermissionsGranted() = REQUIRED_PERMISSIONS.all {
ContextCompat.checkSelfPermission(
this, it
) == PackageManager.PERMISSION_GRANTED
}

@SuppressLint("UnsafeExperimentalUsageError")
private fun startCamera() {
cameraProviderListenableFuture.addListener({
cameraProvider = cameraProviderListenableFuture.get()
binding.viewFinder.post { setupCamera() }
}, ContextCompat.getMainExecutor(this))
}

private fun buildPreviewUseCase(): Preview {
val display = binding.viewFinder.display
val metrics = DisplayMetrics().also { display.getRealMetrics(it) }
val rotation = display.rotation

return Preview.Builder()
.setTargetResolution(Size(metrics.widthPixels, metrics.heightPixels))
.setTargetRotation(rotation)
.build()
.apply {
setSurfaceProvider(binding.viewFinder.surfaceProvider)
}
}

@SuppressLint("UnsafeOptInUsageError")
private fun setupCamera() {
cameraProviderListenableFuture.addListener({

// Preview
val preview = buildPreviewUseCase()

val imageAnalyzer = ImageAnalysis.Builder()
.build()
.also { it ->
it.setAnalyzer(ContextCompat.getMainExecutor(this),
{ processImage(it) })
}

// Select back camera as a default
val cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA

val useCaseGroup = UseCaseGroup.Builder()
.addUseCase(preview)
.addUseCase(imageAnalyzer)
.build()

try {
// Unbind use cases before rebinding
cameraProvider?.unbindAll()

// Bind use cases to camera
cameraProvider?.bindToLifecycle(
this, cameraSelector, useCaseGroup
)

} catch (exc: Exception) {
Log.e(TAG, "Use case binding failed", exc)
}

}, ContextCompat.getMainExecutor(this))
}

@SuppressLint("UnsafeOptInUsageError")
private fun processImage(imageProxy: ImageProxy) {
// This code will display the image available in the ImageProxy within an inset view
// if the inset view is visible to the user.
//
// The source for ImageUtils is at
// https://github.com/googlesamples/mlkit/blob/master/android/translate-showcase/app/src/main/java/com/google/mlkit/showcase/translate/util/ImageUtils.kt
if (binding.insetView.visibility == View.VISIBLE) {
var imageBitmap = ImageUtils.convertYuv420888ImageToBitmap(imageProxy.image!!)
imageBitmap = rotateBitmap(imageBitmap, imageProxy.imageInfo.rotationDegrees.toFloat())
binding.insetView.setImageBitmap(imageBitmap)
}

// PreviewViews allow access to a bitmap representation of what the preview shows. This is
// just a whole lot easier than mapping the ImageProxy image to what the PreviewView
// displays on the screen. See https://stackoverflow.com/a/63912198/6287910
binding.viewFinder.bitmap?.apply {
recognizeText(
InputImage.fromBitmap(this, 0)
).addOnCompleteListener { imageProxy.close() }
}
}

private fun recognizeText(image: InputImage): Task<Text> {
val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)

return recognizer.process(image)
.addOnSuccessListener(
ScopedExecutor(TaskExecutors.MAIN_THREAD),
{
binding.wordFence.clearBoxes()
binding.wordFence.getHitRect(wordFenceRect)
for (block in it.textBlocks) {
for (line in block.lines) {
for (element in line.elements) {
// For each word, check to make sure that the entire word is
// contained with the word fence.
if (isRectWithinRect(element.boundingBox, wordFenceRect)) {
// Change the box boundary from the coordinate system of the
// parent to the coordinates of the word fence.
val outlineBox = Rect(element.boundingBox)
outlineBox.offset(
-binding.wordFence.left,
-binding.wordFence.top
)
binding.wordFence.addBox(outlineBox)
}
}
}
}
binding.wordFence.invalidate()
})
}

override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<out String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
if (requestCode == REQUEST_CODE_PERMISSIONS) {
if (allPermissionsGranted()) {
startCamera()
} else {
Toast.makeText(
this,
"Permissions not granted by the user.",
Toast.LENGTH_SHORT
).show()
// finish()
}
return
}
}

private fun isRectWithinRect(enclosedRect: Rect?, enclosingRect: Rect) =
enclosedRect != null && enclosingRect.contains(enclosedRect)

private fun rotateBitmap(bitmap: Bitmap, rotation: Float) =
Matrix().run {
preRotate(rotation)
Bitmap.createBitmap(
bitmap, 0, 0, bitmap.width, bitmap.height, this, true
)
}

companion object {
private const val TAG = "Applog"
private const val REQUEST_CODE_PERMISSIONS = 10
private val REQUIRED_PERMISSIONS = arrayOf(Manifest.permission.CAMERA)
}
}
BoxedWordView.kt
class BoxedWordView @JvmOverloads constructor(
context: Context, attrs: AttributeSet? = null, defStyleAttr: Int = 0
) : View(context, attrs, defStyleAttr) {

private val mBoxes = mutableListOf<Rect>()
private val mPaint = Paint().apply {
strokeWidth = 2f
color = context.resources.getColor(android.R.color.holo_red_light)
style = Paint.Style.STROKE
}

override fun onDrawForeground(canvas: Canvas) {
super.onDrawForeground(canvas)
for (box in mBoxes) {
drawBox(canvas, box)
}
}

private fun drawBox(canvas: Canvas, box: Rect) {
canvas.drawRect(box, mPaint)
}

fun addBox(box: Rect) {
mBoxes.add(box)
}

fun clearBoxes() {
mBoxes.clear()
}
}
activity_main.xml
<layout>

<androidx.constraintlayout.widget.ConstraintLayout
android:id="@+id/root"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:background="@android:color/darker_gray">

<androidx.camera.view.PreviewView
android:id="@+id/viewFinder"
android:layout_width="0dp"
android:layout_height="0dp"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />

<ImageView
android:id="@+id/insetView"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:visibility="invisible"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintStart_toStartOf="parent"
tools:srcCompat="@tools:sample/backgrounds/scenic" />

<com.example.textrecognition.BoxedWordView
android:id="@+id/wordFence"
android:layout_width="0dp"
android:layout_height="250dp"
android:layout_margin="16dp"
android:background="@drawable/background_drawable"
app:layout_constraintBottom_toBottomOf="@+id/viewFinder"
app:layout_constraintEnd_toEndOf="@id/viewFinder"
app:layout_constraintStart_toStartOf="@id/viewFinder"
app:layout_constraintTop_toTopOf="@id/viewFinder" />
</androidx.constraintlayout.widget.ConstraintLayout>
</layout>
我会提到,从“最近”列表返回时,该应用程序有时会卡住。我可能已经介绍了这个问题,但请注意它。

关于android - 在预览camerax android中获取所有文本框,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68730714/

30 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com