Capture an image and display the page preview with perspective correction

This commit is contained in:
Pierre-Yves Nicolas
2025-05-31 10:46:25 +02:00
parent 02cc4a7627
commit 453923b42d
7 changed files with 285 additions and 39 deletions

View File

@@ -8,6 +8,9 @@ import org.opencv.core.MatOfPoint2f
import org.opencv.core.Size import org.opencv.core.Size
import org.opencv.imgproc.Imgproc import org.opencv.imgproc.Imgproc
import kotlin.math.abs import kotlin.math.abs
import kotlin.math.max
import kotlin.math.sqrt
import androidx.core.graphics.createBitmap
fun detectDocumentQuad(mask: Bitmap): Quad? { fun detectDocumentQuad(mask: Bitmap): Quad? {
val mat = Mat() val mat = Mat()
@@ -47,3 +50,46 @@ fun detectDocumentQuad(mask: Bitmap): Quad? {
val vertices = biggest?.toList()?.map { Point(it.x.toInt(), it.y.toInt()) } val vertices = biggest?.toList()?.map { Point(it.x.toInt(), it.y.toInt()) }
return createQuad(vertices) return createQuad(vertices)
} }
fun extractDocument(originalBitmap: Bitmap, quad: Quad): Bitmap {
val widthTop = norm(quad.topLeft, quad.topRight)
val widthBottom = norm(quad.bottomLeft, quad.bottomRight)
val maxWidth = max(widthTop, widthBottom).toInt()
val heightLeft = norm(quad.topLeft, quad.bottomLeft)
val heightRight = norm(quad.topRight, quad.bottomRight)
val maxHeight = max(heightLeft, heightRight).toInt()
val srcPoints = MatOfPoint2f(
quad.topLeft.toCv(),
quad.topRight.toCv(),
quad.bottomRight.toCv(),
quad.bottomLeft.toCv(),
)
val dstPoints = MatOfPoint2f(
org.opencv.core.Point(0.0, 0.0),
org.opencv.core.Point(maxWidth.toDouble(), 0.0),
org.opencv.core.Point(maxWidth.toDouble(), maxHeight.toDouble()),
org.opencv.core.Point(0.0, maxHeight.toDouble())
)
val transform = Imgproc.getPerspectiveTransform(srcPoints, dstPoints)
val inputMat = Mat()
Utils.bitmapToMat(originalBitmap, inputMat)
val outputMat = Mat()
Imgproc.warpPerspective(inputMat, outputMat, transform, Size(maxWidth.toDouble(), maxHeight.toDouble()))
val outputBitmap = createBitmap(maxWidth, maxHeight)
Utils.matToBitmap(outputMat, outputBitmap)
return outputBitmap
}
fun Point.toCv(): org.opencv.core.Point {
return org.opencv.core.Point(x.toDouble(), y.toDouble())
}
private fun norm(p1: Point, p2: Point): Double {
val dx = (p2.x - p1.x)
val dy = (p2.y - p1.y)
return sqrt(dx.toDouble() * dx + dy * dy)
}

View File

@@ -55,30 +55,39 @@ class ImageSegmentationService(private val context: Context) {
} }
} }
suspend fun runSegmentation(bitmap: Bitmap, rotationDegrees: Int) { private fun runSegmentation(interpreter: Interpreter, bitmap: Bitmap, rotationDegrees: Int): SegmentationResult {
try {
withContext(Dispatchers.IO) {
if (interpreter == null) return@withContext
val startTime = SystemClock.uptimeMillis() val startTime = SystemClock.uptimeMillis()
val (_, _, h, w) = interpreter?.getInputTensor(0)?.shape() ?: return@withContext val (_, _, h, w) = interpreter.getInputTensor(0).shape()
val dataType = interpreter?.getInputTensor(0)?.dataType()
Log.i(TAG, "segment, input shape: ${interpreter!!.getInputTensor(0).shape().asList()} data type=${dataType}")
// Preprocess manually into CHW float buffer // Preprocess manually into CHW float buffer
val inputBuffer = bitmapToCHWFloatBuffer(bitmap, width = w, height = h, rotationDegrees) val inputBuffer = bitmapToCHWFloatBuffer(bitmap, width = w, height = h, rotationDegrees)
val (_, cOut, hOut, wOut) = interpreter!!.getOutputTensor(0).shape() val (_, cOut, hOut, wOut) = interpreter.getOutputTensor(0).shape()
val outputBuffer = FloatBuffer.allocate(cOut * hOut * wOut) val outputBuffer = FloatBuffer.allocate(cOut * hOut * wOut)
// Run inference // Run inference
outputBuffer.rewind() outputBuffer.rewind()
interpreter?.run(inputBuffer, outputBuffer) interpreter.run(inputBuffer, outputBuffer)
val inferenceTime = SystemClock.uptimeMillis() - startTime val inferenceTime = SystemClock.uptimeMillis() - startTime
if (isActive) {
val segmentResult = processOutputBuffer(outputBuffer, wOut, hOut, cOut) val segmentResult = processOutputBuffer(outputBuffer, wOut, hOut, cOut)
_segmentation.value = SegmentationResult(segmentResult, inferenceTime) return SegmentationResult(segmentResult, inferenceTime)
}
fun runSegmentationAndReturn(bitmap: Bitmap, rotationDegrees: Int): SegmentationResult? {
if (interpreter != null) {
return runSegmentation(interpreter!!, bitmap, rotationDegrees)
}
return null
}
suspend fun runSegmentationAndEmit(bitmap: Bitmap, rotationDegrees: Int) {
if (interpreter == null) return
try {
withContext(Dispatchers.IO) {
val segmentationResult = runSegmentation(interpreter!!, bitmap, rotationDegrees)
if (isActive) {
_segmentation.value = segmentationResult
} }
} }
} catch (e: Exception) { } catch (e: Exception) {

View File

@@ -7,13 +7,13 @@ import androidx.activity.compose.setContent
import androidx.activity.enableEdgeToEdge import androidx.activity.enableEdgeToEdge
import androidx.activity.viewModels import androidx.activity.viewModels
import androidx.compose.foundation.background import androidx.compose.foundation.background
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxWidth import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding import androidx.compose.foundation.layout.padding
import androidx.compose.material3.Scaffold import androidx.compose.material3.Scaffold
import androidx.compose.material3.Text import androidx.compose.material3.Text
import androidx.compose.runtime.Composable import androidx.compose.runtime.Composable
import androidx.compose.runtime.collectAsState
import androidx.compose.runtime.getValue import androidx.compose.runtime.getValue
import androidx.compose.ui.Modifier import androidx.compose.ui.Modifier
import androidx.compose.ui.graphics.Color import androidx.compose.ui.graphics.Color
@@ -22,6 +22,7 @@ import androidx.compose.ui.unit.dp
import androidx.lifecycle.compose.collectAsStateWithLifecycle import androidx.lifecycle.compose.collectAsStateWithLifecycle
import org.mydomain.myscan.ui.theme.MyScanTheme import org.mydomain.myscan.ui.theme.MyScanTheme
import org.mydomain.myscan.view.CameraScreen import org.mydomain.myscan.view.CameraScreen
import org.mydomain.myscan.view.PagePreviewScreen
import org.opencv.android.OpenCVLoader import org.opencv.android.OpenCVLoader
class MainActivity : ComponentActivity() { class MainActivity : ComponentActivity() {
@@ -32,14 +33,27 @@ class MainActivity : ComponentActivity() {
val viewModel: MainViewModel by viewModels { MainViewModel.getFactory(this) } val viewModel: MainViewModel by viewModels { MainViewModel.getFactory(this) }
enableEdgeToEdge() enableEdgeToEdge()
setContent { setContent {
// TODO or collectAsStateWithLifecycle()?
val currentScreen by viewModel.currentScreen.collectAsState()
// TODO should uiState own currentScreen?
val uiState by viewModel.uiState.collectAsStateWithLifecycle() val uiState by viewModel.uiState.collectAsStateWithLifecycle()
MyScanTheme { MyScanTheme {
Scaffold { innerPadding -> Scaffold { innerPadding ->
Column { Column {
Greeting(modifier = Modifier.padding(innerPadding)) Greeting(modifier = Modifier.padding(innerPadding))
MyMessageBox(uiState.detectionMessage, uiState.inferenceTime) MyMessageBox(uiState.detectionMessage, uiState.inferenceTime)
Box { when (val screen = currentScreen) {
CameraScreen(uiState, onImageAnalyzed = { image -> viewModel.segment(image) } ) is Screen.Camera -> {
CameraScreen(viewModel, uiState,
onImageAnalyzed = { image -> viewModel.segment(image) } )
}
is Screen.PagePreview -> {
PagePreviewScreen (
image = screen.image,
isProcessing = screen.isProcessing,
onBackPressed = { viewModel.navigateTo(Screen.Camera) }
)
}
} }
} }
} }

View File

@@ -1,17 +1,22 @@
package org.mydomain.myscan package org.mydomain.myscan
import android.content.Context import android.content.Context
import android.graphics.Bitmap
import android.graphics.Matrix
import android.util.Log
import androidx.camera.core.ImageProxy import androidx.camera.core.ImageProxy
import androidx.lifecycle.ViewModel import androidx.lifecycle.ViewModel
import androidx.lifecycle.ViewModelProvider import androidx.lifecycle.ViewModelProvider
import androidx.lifecycle.viewModelScope import androidx.lifecycle.viewModelScope
import androidx.lifecycle.viewmodel.CreationExtras import androidx.lifecycle.viewmodel.CreationExtras
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.filterNotNull import kotlinx.coroutines.flow.filterNotNull
import kotlinx.coroutines.flow.map import kotlinx.coroutines.flow.map
import kotlinx.coroutines.launch import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
class MainViewModel(private val imageSegmentationService: ImageSegmentationService): ViewModel() { class MainViewModel(private val imageSegmentationService: ImageSegmentationService): ViewModel() {
@@ -26,6 +31,9 @@ class MainViewModel(private val imageSegmentationService: ImageSegmentationServi
private var _uiState = MutableStateFlow(UiState("just started")) private var _uiState = MutableStateFlow(UiState("just started"))
val uiState: StateFlow<UiState> = _uiState.asStateFlow() val uiState: StateFlow<UiState> = _uiState.asStateFlow()
private val _currentScreen = MutableStateFlow<Screen>(Screen.Camera)
val currentScreen: StateFlow<Screen> = _currentScreen.asStateFlow()
init { init {
viewModelScope.launch { viewModelScope.launch {
imageSegmentationService.initialize() imageSegmentationService.initialize()
@@ -48,7 +56,7 @@ class MainViewModel(private val imageSegmentationService: ImageSegmentationServi
fun segment(imageProxy: ImageProxy) { fun segment(imageProxy: ImageProxy) {
viewModelScope.launch { viewModelScope.launch {
imageSegmentationService.runSegmentation( imageSegmentationService.runSegmentationAndEmit(
imageProxy.toBitmap(), imageProxy.toBitmap(),
imageProxy.imageInfo.rotationDegrees, imageProxy.imageInfo.rotationDegrees,
) )
@@ -56,4 +64,38 @@ class MainViewModel(private val imageSegmentationService: ImageSegmentationServi
} }
} }
fun navigateTo(screen: Screen) {
_currentScreen.value = screen
}
fun processCapturedImageAndNavigate(imageProxy: ImageProxy) {
viewModelScope.launch {
Log.d("MyScan", "Navigating to spinner")
navigateTo(Screen.PagePreview(image = null, isProcessing = true))
val processedImage = processCapturedImage(imageProxy)
Log.d("MyScan", "Navigating to result image")
navigateTo(Screen.PagePreview(image = processedImage, isProcessing = false))
}
}
private suspend fun processCapturedImage(imageProxy: ImageProxy): Bitmap? = withContext(Dispatchers.IO) {
var corrected: Bitmap? = null
val bitmap = imageProxy.toBitmap().rotate(imageProxy.imageInfo.rotationDegrees)
val segmentation = imageSegmentationService.runSegmentationAndReturn(bitmap, 0)
if (segmentation != null) {
val mask = segmentation.segmentation.toBinaryMask()
val quad = detectDocumentQuad(mask)
if (quad != null) {
val resizedQuad = quad.scaledTo(mask.width, mask.height, bitmap.width, bitmap.height)
corrected = extractDocument(bitmap, resizedQuad)
}
}
return@withContext corrected
}
fun Bitmap.rotate(degrees: Int): Bitmap {
if (degrees == 0) return this
val matrix = Matrix().apply { postRotate(degrees.toFloat()) }
return Bitmap.createBitmap(this, 0, 0, width, height, matrix, true)
}
} }

View File

@@ -0,0 +1,11 @@
package org.mydomain.myscan
import android.graphics.Bitmap
sealed class Screen {
object Camera : Screen()
data class PagePreview(
val image: Bitmap? = null,
val isProcessing: Boolean = true
) : Screen()
}

View File

@@ -1,7 +1,9 @@
package org.mydomain.myscan.view package org.mydomain.myscan.view
import android.content.Context
import android.content.pm.PackageManager.PERMISSION_GRANTED import android.content.pm.PackageManager.PERMISSION_GRANTED
import android.graphics.Bitmap import android.graphics.Bitmap
import android.util.Log
import android.view.ViewGroup.LayoutParams.MATCH_PARENT import android.view.ViewGroup.LayoutParams.MATCH_PARENT
import android.widget.LinearLayout import android.widget.LinearLayout
import android.widget.Toast import android.widget.Toast
@@ -10,21 +12,27 @@ import androidx.activity.result.contract.ActivityResultContracts
import androidx.camera.core.AspectRatio.RATIO_4_3 import androidx.camera.core.AspectRatio.RATIO_4_3
import androidx.camera.core.CameraSelector import androidx.camera.core.CameraSelector
import androidx.camera.core.ImageAnalysis import androidx.camera.core.ImageAnalysis
import androidx.camera.core.ImageCapture
import androidx.camera.core.ImageCaptureException
import androidx.camera.core.ImageProxy import androidx.camera.core.ImageProxy
import androidx.camera.core.Preview import androidx.camera.core.Preview
import androidx.camera.lifecycle.ProcessCameraProvider import androidx.camera.lifecycle.ProcessCameraProvider
import androidx.camera.view.PreviewView import androidx.camera.view.PreviewView
import androidx.compose.foundation.Canvas import androidx.compose.foundation.Canvas
import androidx.compose.foundation.layout.Box import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.width import androidx.compose.foundation.layout.width
import androidx.compose.material3.Button
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable import androidx.compose.runtime.Composable
import androidx.compose.runtime.DisposableEffect import androidx.compose.runtime.DisposableEffect
import androidx.compose.runtime.LaunchedEffect import androidx.compose.runtime.LaunchedEffect
import androidx.compose.runtime.getValue import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember import androidx.compose.runtime.remember
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier import androidx.compose.ui.Modifier
import androidx.compose.ui.geometry.Offset import androidx.compose.ui.geometry.Offset
import androidx.compose.ui.graphics.BlendMode import androidx.compose.ui.graphics.BlendMode
@@ -37,18 +45,20 @@ import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.unit.dp import androidx.compose.ui.unit.dp
import androidx.compose.ui.viewinterop.AndroidView import androidx.compose.ui.viewinterop.AndroidView
import androidx.core.content.ContextCompat import androidx.core.content.ContextCompat
import androidx.core.graphics.scale
import androidx.lifecycle.LifecycleOwner import androidx.lifecycle.LifecycleOwner
import androidx.lifecycle.compose.LocalLifecycleOwner import androidx.lifecycle.compose.LocalLifecycleOwner
import com.google.common.util.concurrent.ListenableFuture import com.google.common.util.concurrent.ListenableFuture
import org.mydomain.myscan.MainViewModel
import org.mydomain.myscan.Point
import org.mydomain.myscan.UiState import org.mydomain.myscan.UiState
import org.mydomain.myscan.scaledTo
import java.util.concurrent.ExecutorService import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors import java.util.concurrent.Executors
import androidx.core.graphics.scale
import org.mydomain.myscan.Point
import org.mydomain.myscan.scaledTo
@Composable @Composable
fun CameraScreen( fun CameraScreen(
viewModel: MainViewModel,
uiState: UiState, uiState: UiState,
onImageAnalyzed: (ImageProxy) -> Unit, onImageAnalyzed: (ImageProxy) -> Unit,
) { ) {
@@ -62,6 +72,8 @@ fun CameraScreen(
} }
} }
val captureController = remember { CameraCaptureController() }
LaunchedEffect(Unit) { LaunchedEffect(Unit) {
val camera = android.Manifest.permission.CAMERA val camera = android.Manifest.permission.CAMERA
if (ContextCompat.checkSelfPermission(context, camera) != PERMISSION_GRANTED) { if (ContextCompat.checkSelfPermission(context, camera) != PERMISSION_GRANTED) {
@@ -69,6 +81,7 @@ fun CameraScreen(
} }
} }
Column {
val width = LocalConfiguration.current.screenWidthDp val width = LocalConfiguration.current.screenWidthDp
val height = width / 3 * 4 val height = width / 3 * 4
Box( Box(
@@ -76,15 +89,35 @@ fun CameraScreen(
.width(width.dp) .width(width.dp)
.height(height.dp) .height(height.dp)
) { ) {
CameraPreview(onImageAnalyzed = onImageAnalyzed) CameraPreview(
onImageAnalyzed = onImageAnalyzed,
captureController = captureController)
AnalysisOverlay(uiState) AnalysisOverlay(uiState)
} }
Button(
onClick = {
captureController.takePicture(
context = context,
onImageCaptured = { imageProxy ->
if (imageProxy != null) {
viewModel.processCapturedImageAndNavigate(imageProxy)
} else {
Log.e("MyScan", "Error during image capture")
}
}
)},
modifier = Modifier.align(Alignment.CenterHorizontally),
) {
Text("Capture")
}
}
} }
@Composable @Composable
fun CameraPreview( fun CameraPreview(
modifier: Modifier = Modifier, modifier: Modifier = Modifier,
onImageAnalyzed: (ImageProxy) -> Unit, onImageAnalyzed: (ImageProxy) -> Unit,
captureController: CameraCaptureController,
) { ) {
val context = LocalContext.current val context = LocalContext.current
val lifecycleOwner = LocalLifecycleOwner.current val lifecycleOwner = LocalLifecycleOwner.current
@@ -110,7 +143,8 @@ fun CameraPreview(
cameraProviderFuture = cameraProviderFuture, cameraProviderFuture = cameraProviderFuture,
executor = executor, executor = executor,
previewView = previewView, previewView = previewView,
onImageAnalyzed = onImageAnalyzed onImageAnalyzed = onImageAnalyzed,
captureController = captureController
) )
}, ContextCompat.getMainExecutor(context)) }, ContextCompat.getMainExecutor(context))
@@ -124,6 +158,7 @@ fun bindCameraUseCases(
executor: ExecutorService, executor: ExecutorService,
previewView: PreviewView, previewView: PreviewView,
onImageAnalyzed: (ImageProxy) -> Unit, onImageAnalyzed: (ImageProxy) -> Unit,
captureController: CameraCaptureController,
) { ) {
val preview: Preview = Preview.Builder().setTargetAspectRatio(RATIO_4_3).build() val preview: Preview = Preview.Builder().setTargetAspectRatio(RATIO_4_3).build()
@@ -136,8 +171,14 @@ fun bindCameraUseCases(
.setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build() .setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build()
imageAnalysis.setAnalyzer(executor, onImageAnalyzed) imageAnalysis.setAnalyzer(executor, onImageAnalyzed)
val imageCapture = ImageCapture.Builder()
.setTargetAspectRatio(RATIO_4_3)
.setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY)
.build()
captureController.imageCapture = imageCapture
val cameraProvider = cameraProviderFuture.get() val cameraProvider = cameraProviderFuture.get()
cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, imageAnalysis, preview) cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, imageAnalysis, preview, imageCapture)
} }
@Composable @Composable
@@ -187,3 +228,23 @@ fun replaceColor(bitmap: Bitmap, toReplace: Color, replacement: Color): Bitmap {
} }
fun Point.toOffset() = Offset(x.toFloat(), y.toFloat()) fun Point.toOffset() = Offset(x.toFloat(), y.toFloat())
class CameraCaptureController {
var imageCapture: ImageCapture? = null
fun takePicture(context: Context, onImageCaptured: (ImageProxy?) -> Unit) {
imageCapture?.takePicture(
// TODO is it a good idea to use this executor?
ContextCompat.getMainExecutor(context),
object : ImageCapture.OnImageCapturedCallback() {
override fun onCaptureSuccess(imageProxy: ImageProxy) {
onImageCaptured(imageProxy)
}
override fun onError(exception: ImageCaptureException) {
Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception)
onImageCaptured(null)
}
}
)
}
}

View File

@@ -0,0 +1,63 @@
package org.mydomain.myscan.view
import android.graphics.Bitmap
import androidx.compose.foundation.Image
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.padding
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.automirrored.filled.ArrowBack
import androidx.compose.material3.CircularProgressIndicator
import androidx.compose.material3.Icon
import androidx.compose.material3.IconButton
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.graphics.asImageBitmap
import androidx.compose.ui.layout.ContentScale
import androidx.compose.ui.unit.dp
@Composable
fun PagePreviewScreen(
image: Bitmap?,
isProcessing: Boolean,
onBackPressed: () -> Unit
) {
Box(modifier = Modifier.fillMaxSize()) {
when {
isProcessing -> {
CircularProgressIndicator(
modifier = Modifier
.align(Alignment.Center)
)
}
image != null -> {
Image(
bitmap = image.asImageBitmap(),
contentDescription = "Document preview",
modifier = Modifier.fillMaxSize(),
contentScale = ContentScale.Fit
)
}
else -> {
Text(
text = "No image is available.",
modifier = Modifier.align(Alignment.Center)
)
}
}
IconButton (
onClick = onBackPressed,
modifier = Modifier
.align(Alignment.TopStart)
.padding(16.dp)
) {
Icon(
imageVector = Icons.AutoMirrored.Filled.ArrowBack,
contentDescription = "Back"
)
}
}
}