Capture an image and display the page preview with perspective correction

This commit is contained in:
Pierre-Yves Nicolas
2025-05-31 10:46:25 +02:00
parent 02cc4a7627
commit 453923b42d
7 changed files with 285 additions and 39 deletions

View File

@@ -8,6 +8,9 @@ import org.opencv.core.MatOfPoint2f
import org.opencv.core.Size
import org.opencv.imgproc.Imgproc
import kotlin.math.abs
import kotlin.math.max
import kotlin.math.sqrt
import androidx.core.graphics.createBitmap
fun detectDocumentQuad(mask: Bitmap): Quad? {
val mat = Mat()
@@ -47,3 +50,46 @@ fun detectDocumentQuad(mask: Bitmap): Quad? {
val vertices = biggest?.toList()?.map { Point(it.x.toInt(), it.y.toInt()) }
return createQuad(vertices)
}
fun extractDocument(originalBitmap: Bitmap, quad: Quad): Bitmap {
val widthTop = norm(quad.topLeft, quad.topRight)
val widthBottom = norm(quad.bottomLeft, quad.bottomRight)
val maxWidth = max(widthTop, widthBottom).toInt()
val heightLeft = norm(quad.topLeft, quad.bottomLeft)
val heightRight = norm(quad.topRight, quad.bottomRight)
val maxHeight = max(heightLeft, heightRight).toInt()
val srcPoints = MatOfPoint2f(
quad.topLeft.toCv(),
quad.topRight.toCv(),
quad.bottomRight.toCv(),
quad.bottomLeft.toCv(),
)
val dstPoints = MatOfPoint2f(
org.opencv.core.Point(0.0, 0.0),
org.opencv.core.Point(maxWidth.toDouble(), 0.0),
org.opencv.core.Point(maxWidth.toDouble(), maxHeight.toDouble()),
org.opencv.core.Point(0.0, maxHeight.toDouble())
)
val transform = Imgproc.getPerspectiveTransform(srcPoints, dstPoints)
val inputMat = Mat()
Utils.bitmapToMat(originalBitmap, inputMat)
val outputMat = Mat()
Imgproc.warpPerspective(inputMat, outputMat, transform, Size(maxWidth.toDouble(), maxHeight.toDouble()))
val outputBitmap = createBitmap(maxWidth, maxHeight)
Utils.matToBitmap(outputMat, outputBitmap)
return outputBitmap
}
fun Point.toCv(): org.opencv.core.Point {
return org.opencv.core.Point(x.toDouble(), y.toDouble())
}
private fun norm(p1: Point, p2: Point): Double {
val dx = (p2.x - p1.x)
val dy = (p2.y - p1.y)
return sqrt(dx.toDouble() * dx + dy * dy)
}

View File

@@ -55,30 +55,39 @@ class ImageSegmentationService(private val context: Context) {
}
}
suspend fun runSegmentation(bitmap: Bitmap, rotationDegrees: Int) {
try {
withContext(Dispatchers.IO) {
if (interpreter == null) return@withContext
private fun runSegmentation(interpreter: Interpreter, bitmap: Bitmap, rotationDegrees: Int): SegmentationResult {
val startTime = SystemClock.uptimeMillis()
val (_, _, h, w) = interpreter?.getInputTensor(0)?.shape() ?: return@withContext
val dataType = interpreter?.getInputTensor(0)?.dataType()
Log.i(TAG, "segment, input shape: ${interpreter!!.getInputTensor(0).shape().asList()} data type=${dataType}")
val (_, _, h, w) = interpreter.getInputTensor(0).shape()
// Preprocess manually into CHW float buffer
val inputBuffer = bitmapToCHWFloatBuffer(bitmap, width = w, height = h, rotationDegrees)
val (_, cOut, hOut, wOut) = interpreter!!.getOutputTensor(0).shape()
val (_, cOut, hOut, wOut) = interpreter.getOutputTensor(0).shape()
val outputBuffer = FloatBuffer.allocate(cOut * hOut * wOut)
// Run inference
outputBuffer.rewind()
interpreter?.run(inputBuffer, outputBuffer)
interpreter.run(inputBuffer, outputBuffer)
val inferenceTime = SystemClock.uptimeMillis() - startTime
if (isActive) {
val segmentResult = processOutputBuffer(outputBuffer, wOut, hOut, cOut)
_segmentation.value = SegmentationResult(segmentResult, inferenceTime)
return SegmentationResult(segmentResult, inferenceTime)
}
fun runSegmentationAndReturn(bitmap: Bitmap, rotationDegrees: Int): SegmentationResult? {
if (interpreter != null) {
return runSegmentation(interpreter!!, bitmap, rotationDegrees)
}
return null
}
suspend fun runSegmentationAndEmit(bitmap: Bitmap, rotationDegrees: Int) {
if (interpreter == null) return
try {
withContext(Dispatchers.IO) {
val segmentationResult = runSegmentation(interpreter!!, bitmap, rotationDegrees)
if (isActive) {
_segmentation.value = segmentationResult
}
}
} catch (e: Exception) {

View File

@@ -7,13 +7,13 @@ import androidx.activity.compose.setContent
import androidx.activity.enableEdgeToEdge
import androidx.activity.viewModels
import androidx.compose.foundation.background
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.Scaffold
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.collectAsState
import androidx.compose.runtime.getValue
import androidx.compose.ui.Modifier
import androidx.compose.ui.graphics.Color
@@ -22,6 +22,7 @@ import androidx.compose.ui.unit.dp
import androidx.lifecycle.compose.collectAsStateWithLifecycle
import org.mydomain.myscan.ui.theme.MyScanTheme
import org.mydomain.myscan.view.CameraScreen
import org.mydomain.myscan.view.PagePreviewScreen
import org.opencv.android.OpenCVLoader
class MainActivity : ComponentActivity() {
@@ -32,14 +33,27 @@ class MainActivity : ComponentActivity() {
val viewModel: MainViewModel by viewModels { MainViewModel.getFactory(this) }
enableEdgeToEdge()
setContent {
// TODO or collectAsStateWithLifecycle()?
val currentScreen by viewModel.currentScreen.collectAsState()
// TODO should uiState own currentScreen?
val uiState by viewModel.uiState.collectAsStateWithLifecycle()
MyScanTheme {
Scaffold { innerPadding ->
Column {
Greeting(modifier = Modifier.padding(innerPadding))
MyMessageBox(uiState.detectionMessage, uiState.inferenceTime)
Box {
CameraScreen(uiState, onImageAnalyzed = { image -> viewModel.segment(image) } )
when (val screen = currentScreen) {
is Screen.Camera -> {
CameraScreen(viewModel, uiState,
onImageAnalyzed = { image -> viewModel.segment(image) } )
}
is Screen.PagePreview -> {
PagePreviewScreen (
image = screen.image,
isProcessing = screen.isProcessing,
onBackPressed = { viewModel.navigateTo(Screen.Camera) }
)
}
}
}
}

View File

@@ -1,17 +1,22 @@
package org.mydomain.myscan
import android.content.Context
import android.graphics.Bitmap
import android.graphics.Matrix
import android.util.Log
import androidx.camera.core.ImageProxy
import androidx.lifecycle.ViewModel
import androidx.lifecycle.ViewModelProvider
import androidx.lifecycle.viewModelScope
import androidx.lifecycle.viewmodel.CreationExtras
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.filterNotNull
import kotlinx.coroutines.flow.map
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
class MainViewModel(private val imageSegmentationService: ImageSegmentationService): ViewModel() {
@@ -26,6 +31,9 @@ class MainViewModel(private val imageSegmentationService: ImageSegmentationServi
private var _uiState = MutableStateFlow(UiState("just started"))
val uiState: StateFlow<UiState> = _uiState.asStateFlow()
private val _currentScreen = MutableStateFlow<Screen>(Screen.Camera)
val currentScreen: StateFlow<Screen> = _currentScreen.asStateFlow()
init {
viewModelScope.launch {
imageSegmentationService.initialize()
@@ -48,7 +56,7 @@ class MainViewModel(private val imageSegmentationService: ImageSegmentationServi
fun segment(imageProxy: ImageProxy) {
viewModelScope.launch {
imageSegmentationService.runSegmentation(
imageSegmentationService.runSegmentationAndEmit(
imageProxy.toBitmap(),
imageProxy.imageInfo.rotationDegrees,
)
@@ -56,4 +64,38 @@ class MainViewModel(private val imageSegmentationService: ImageSegmentationServi
}
}
fun navigateTo(screen: Screen) {
_currentScreen.value = screen
}
fun processCapturedImageAndNavigate(imageProxy: ImageProxy) {
viewModelScope.launch {
Log.d("MyScan", "Navigating to spinner")
navigateTo(Screen.PagePreview(image = null, isProcessing = true))
val processedImage = processCapturedImage(imageProxy)
Log.d("MyScan", "Navigating to result image")
navigateTo(Screen.PagePreview(image = processedImage, isProcessing = false))
}
}
private suspend fun processCapturedImage(imageProxy: ImageProxy): Bitmap? = withContext(Dispatchers.IO) {
var corrected: Bitmap? = null
val bitmap = imageProxy.toBitmap().rotate(imageProxy.imageInfo.rotationDegrees)
val segmentation = imageSegmentationService.runSegmentationAndReturn(bitmap, 0)
if (segmentation != null) {
val mask = segmentation.segmentation.toBinaryMask()
val quad = detectDocumentQuad(mask)
if (quad != null) {
val resizedQuad = quad.scaledTo(mask.width, mask.height, bitmap.width, bitmap.height)
corrected = extractDocument(bitmap, resizedQuad)
}
}
return@withContext corrected
}
fun Bitmap.rotate(degrees: Int): Bitmap {
if (degrees == 0) return this
val matrix = Matrix().apply { postRotate(degrees.toFloat()) }
return Bitmap.createBitmap(this, 0, 0, width, height, matrix, true)
}
}

View File

@@ -0,0 +1,11 @@
package org.mydomain.myscan
import android.graphics.Bitmap
sealed class Screen {
object Camera : Screen()
data class PagePreview(
val image: Bitmap? = null,
val isProcessing: Boolean = true
) : Screen()
}

View File

@@ -1,7 +1,9 @@
package org.mydomain.myscan.view
import android.content.Context
import android.content.pm.PackageManager.PERMISSION_GRANTED
import android.graphics.Bitmap
import android.util.Log
import android.view.ViewGroup.LayoutParams.MATCH_PARENT
import android.widget.LinearLayout
import android.widget.Toast
@@ -10,21 +12,27 @@ import androidx.activity.result.contract.ActivityResultContracts
import androidx.camera.core.AspectRatio.RATIO_4_3
import androidx.camera.core.CameraSelector
import androidx.camera.core.ImageAnalysis
import androidx.camera.core.ImageCapture
import androidx.camera.core.ImageCaptureException
import androidx.camera.core.ImageProxy
import androidx.camera.core.Preview
import androidx.camera.lifecycle.ProcessCameraProvider
import androidx.camera.view.PreviewView
import androidx.compose.foundation.Canvas
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.width
import androidx.compose.material3.Button
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.DisposableEffect
import androidx.compose.runtime.LaunchedEffect
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.geometry.Offset
import androidx.compose.ui.graphics.BlendMode
@@ -37,18 +45,20 @@ import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.unit.dp
import androidx.compose.ui.viewinterop.AndroidView
import androidx.core.content.ContextCompat
import androidx.core.graphics.scale
import androidx.lifecycle.LifecycleOwner
import androidx.lifecycle.compose.LocalLifecycleOwner
import com.google.common.util.concurrent.ListenableFuture
import org.mydomain.myscan.MainViewModel
import org.mydomain.myscan.Point
import org.mydomain.myscan.UiState
import org.mydomain.myscan.scaledTo
import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors
import androidx.core.graphics.scale
import org.mydomain.myscan.Point
import org.mydomain.myscan.scaledTo
@Composable
fun CameraScreen(
viewModel: MainViewModel,
uiState: UiState,
onImageAnalyzed: (ImageProxy) -> Unit,
) {
@@ -62,6 +72,8 @@ fun CameraScreen(
}
}
val captureController = remember { CameraCaptureController() }
LaunchedEffect(Unit) {
val camera = android.Manifest.permission.CAMERA
if (ContextCompat.checkSelfPermission(context, camera) != PERMISSION_GRANTED) {
@@ -69,6 +81,7 @@ fun CameraScreen(
}
}
Column {
val width = LocalConfiguration.current.screenWidthDp
val height = width / 3 * 4
Box(
@@ -76,15 +89,35 @@ fun CameraScreen(
.width(width.dp)
.height(height.dp)
) {
CameraPreview(onImageAnalyzed = onImageAnalyzed)
CameraPreview(
onImageAnalyzed = onImageAnalyzed,
captureController = captureController)
AnalysisOverlay(uiState)
}
Button(
onClick = {
captureController.takePicture(
context = context,
onImageCaptured = { imageProxy ->
if (imageProxy != null) {
viewModel.processCapturedImageAndNavigate(imageProxy)
} else {
Log.e("MyScan", "Error during image capture")
}
}
)},
modifier = Modifier.align(Alignment.CenterHorizontally),
) {
Text("Capture")
}
}
}
@Composable
fun CameraPreview(
modifier: Modifier = Modifier,
onImageAnalyzed: (ImageProxy) -> Unit,
captureController: CameraCaptureController,
) {
val context = LocalContext.current
val lifecycleOwner = LocalLifecycleOwner.current
@@ -110,7 +143,8 @@ fun CameraPreview(
cameraProviderFuture = cameraProviderFuture,
executor = executor,
previewView = previewView,
onImageAnalyzed = onImageAnalyzed
onImageAnalyzed = onImageAnalyzed,
captureController = captureController
)
}, ContextCompat.getMainExecutor(context))
@@ -124,6 +158,7 @@ fun bindCameraUseCases(
executor: ExecutorService,
previewView: PreviewView,
onImageAnalyzed: (ImageProxy) -> Unit,
captureController: CameraCaptureController,
) {
val preview: Preview = Preview.Builder().setTargetAspectRatio(RATIO_4_3).build()
@@ -136,8 +171,14 @@ fun bindCameraUseCases(
.setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build()
imageAnalysis.setAnalyzer(executor, onImageAnalyzed)
val imageCapture = ImageCapture.Builder()
.setTargetAspectRatio(RATIO_4_3)
.setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY)
.build()
captureController.imageCapture = imageCapture
val cameraProvider = cameraProviderFuture.get()
cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, imageAnalysis, preview)
cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, imageAnalysis, preview, imageCapture)
}
@Composable
@@ -187,3 +228,23 @@ fun replaceColor(bitmap: Bitmap, toReplace: Color, replacement: Color): Bitmap {
}
fun Point.toOffset() = Offset(x.toFloat(), y.toFloat())
class CameraCaptureController {
var imageCapture: ImageCapture? = null
fun takePicture(context: Context, onImageCaptured: (ImageProxy?) -> Unit) {
imageCapture?.takePicture(
// TODO is it a good idea to use this executor?
ContextCompat.getMainExecutor(context),
object : ImageCapture.OnImageCapturedCallback() {
override fun onCaptureSuccess(imageProxy: ImageProxy) {
onImageCaptured(imageProxy)
}
override fun onError(exception: ImageCaptureException) {
Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception)
onImageCaptured(null)
}
}
)
}
}

View File

@@ -0,0 +1,63 @@
package org.mydomain.myscan.view
import android.graphics.Bitmap
import androidx.compose.foundation.Image
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.padding
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.automirrored.filled.ArrowBack
import androidx.compose.material3.CircularProgressIndicator
import androidx.compose.material3.Icon
import androidx.compose.material3.IconButton
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.graphics.asImageBitmap
import androidx.compose.ui.layout.ContentScale
import androidx.compose.ui.unit.dp
@Composable
fun PagePreviewScreen(
image: Bitmap?,
isProcessing: Boolean,
onBackPressed: () -> Unit
) {
Box(modifier = Modifier.fillMaxSize()) {
when {
isProcessing -> {
CircularProgressIndicator(
modifier = Modifier
.align(Alignment.Center)
)
}
image != null -> {
Image(
bitmap = image.asImageBitmap(),
contentDescription = "Document preview",
modifier = Modifier.fillMaxSize(),
contentScale = ContentScale.Fit
)
}
else -> {
Text(
text = "No image is available.",
modifier = Modifier.align(Alignment.Center)
)
}
}
IconButton (
onClick = onBackPressed,
modifier = Modifier
.align(Alignment.TopStart)
.padding(16.dp)
) {
Icon(
imageVector = Icons.AutoMirrored.Filled.ArrowBack,
contentDescription = "Back"
)
}
}
}