diff --git a/app/src/main/java/org/mydomain/myscan/DocumentDetection.kt b/app/src/main/java/org/mydomain/myscan/DocumentDetection.kt index 13e7c6c..5cfae29 100644 --- a/app/src/main/java/org/mydomain/myscan/DocumentDetection.kt +++ b/app/src/main/java/org/mydomain/myscan/DocumentDetection.kt @@ -8,6 +8,9 @@ import org.opencv.core.MatOfPoint2f import org.opencv.core.Size import org.opencv.imgproc.Imgproc import kotlin.math.abs +import kotlin.math.max +import kotlin.math.sqrt +import androidx.core.graphics.createBitmap fun detectDocumentQuad(mask: Bitmap): Quad? { val mat = Mat() @@ -47,3 +50,46 @@ fun detectDocumentQuad(mask: Bitmap): Quad? { val vertices = biggest?.toList()?.map { Point(it.x.toInt(), it.y.toInt()) } return createQuad(vertices) } + +fun extractDocument(originalBitmap: Bitmap, quad: Quad): Bitmap { + val widthTop = norm(quad.topLeft, quad.topRight) + val widthBottom = norm(quad.bottomLeft, quad.bottomRight) + val maxWidth = max(widthTop, widthBottom).toInt() + + val heightLeft = norm(quad.topLeft, quad.bottomLeft) + val heightRight = norm(quad.topRight, quad.bottomRight) + val maxHeight = max(heightLeft, heightRight).toInt() + + val srcPoints = MatOfPoint2f( + quad.topLeft.toCv(), + quad.topRight.toCv(), + quad.bottomRight.toCv(), + quad.bottomLeft.toCv(), + ) + val dstPoints = MatOfPoint2f( + org.opencv.core.Point(0.0, 0.0), + org.opencv.core.Point(maxWidth.toDouble(), 0.0), + org.opencv.core.Point(maxWidth.toDouble(), maxHeight.toDouble()), + org.opencv.core.Point(0.0, maxHeight.toDouble()) + ) + val transform = Imgproc.getPerspectiveTransform(srcPoints, dstPoints) + + val inputMat = Mat() + Utils.bitmapToMat(originalBitmap, inputMat) + val outputMat = Mat() + Imgproc.warpPerspective(inputMat, outputMat, transform, Size(maxWidth.toDouble(), maxHeight.toDouble())) + + val outputBitmap = createBitmap(maxWidth, maxHeight) + Utils.matToBitmap(outputMat, outputBitmap) + return outputBitmap +} + +fun Point.toCv(): org.opencv.core.Point { + return org.opencv.core.Point(x.toDouble(), y.toDouble()) +} + +private fun norm(p1: Point, p2: Point): Double { + val dx = (p2.x - p1.x) + val dy = (p2.y - p1.y) + return sqrt(dx.toDouble() * dx + dy * dy) +} diff --git a/app/src/main/java/org/mydomain/myscan/ImageSegmentationService.kt b/app/src/main/java/org/mydomain/myscan/ImageSegmentationService.kt index ed0eda6..e96c52a 100644 --- a/app/src/main/java/org/mydomain/myscan/ImageSegmentationService.kt +++ b/app/src/main/java/org/mydomain/myscan/ImageSegmentationService.kt @@ -55,30 +55,39 @@ class ImageSegmentationService(private val context: Context) { } } - suspend fun runSegmentation(bitmap: Bitmap, rotationDegrees: Int) { + private fun runSegmentation(interpreter: Interpreter, bitmap: Bitmap, rotationDegrees: Int): SegmentationResult { + val startTime = SystemClock.uptimeMillis() + + val (_, _, h, w) = interpreter.getInputTensor(0).shape() + // Preprocess manually into CHW float buffer + val inputBuffer = bitmapToCHWFloatBuffer(bitmap, width = w, height = h, rotationDegrees) + + val (_, cOut, hOut, wOut) = interpreter.getOutputTensor(0).shape() + val outputBuffer = FloatBuffer.allocate(cOut * hOut * wOut) + + // Run inference + outputBuffer.rewind() + interpreter.run(inputBuffer, outputBuffer) + + val inferenceTime = SystemClock.uptimeMillis() - startTime + val segmentResult = processOutputBuffer(outputBuffer, wOut, hOut, cOut) + return SegmentationResult(segmentResult, inferenceTime) + } + + fun runSegmentationAndReturn(bitmap: Bitmap, rotationDegrees: Int): SegmentationResult? { + if (interpreter != null) { + return runSegmentation(interpreter!!, bitmap, rotationDegrees) + } + return null + } + + suspend fun runSegmentationAndEmit(bitmap: Bitmap, rotationDegrees: Int) { + if (interpreter == null) return try { withContext(Dispatchers.IO) { - if (interpreter == null) return@withContext - val startTime = SystemClock.uptimeMillis() - - val (_, _, h, w) = interpreter?.getInputTensor(0)?.shape() ?: return@withContext - val dataType = interpreter?.getInputTensor(0)?.dataType() - Log.i(TAG, "segment, input shape: ${interpreter!!.getInputTensor(0).shape().asList()} data type=${dataType}") - - // Preprocess manually into CHW float buffer - val inputBuffer = bitmapToCHWFloatBuffer(bitmap, width = w, height = h, rotationDegrees) - - val (_, cOut, hOut, wOut) = interpreter!!.getOutputTensor(0).shape() - val outputBuffer = FloatBuffer.allocate(cOut * hOut * wOut) - - // Run inference - outputBuffer.rewind() - interpreter?.run(inputBuffer, outputBuffer) - - val inferenceTime = SystemClock.uptimeMillis() - startTime + val segmentationResult = runSegmentation(interpreter!!, bitmap, rotationDegrees) if (isActive) { - val segmentResult = processOutputBuffer(outputBuffer, wOut, hOut, cOut) - _segmentation.value = SegmentationResult(segmentResult, inferenceTime) + _segmentation.value = segmentationResult } } } catch (e: Exception) { diff --git a/app/src/main/java/org/mydomain/myscan/MainActivity.kt b/app/src/main/java/org/mydomain/myscan/MainActivity.kt index 43900d7..5d07335 100644 --- a/app/src/main/java/org/mydomain/myscan/MainActivity.kt +++ b/app/src/main/java/org/mydomain/myscan/MainActivity.kt @@ -7,13 +7,13 @@ import androidx.activity.compose.setContent import androidx.activity.enableEdgeToEdge import androidx.activity.viewModels import androidx.compose.foundation.background -import androidx.compose.foundation.layout.Box import androidx.compose.foundation.layout.Column import androidx.compose.foundation.layout.fillMaxWidth import androidx.compose.foundation.layout.padding import androidx.compose.material3.Scaffold import androidx.compose.material3.Text import androidx.compose.runtime.Composable +import androidx.compose.runtime.collectAsState import androidx.compose.runtime.getValue import androidx.compose.ui.Modifier import androidx.compose.ui.graphics.Color @@ -22,6 +22,7 @@ import androidx.compose.ui.unit.dp import androidx.lifecycle.compose.collectAsStateWithLifecycle import org.mydomain.myscan.ui.theme.MyScanTheme import org.mydomain.myscan.view.CameraScreen +import org.mydomain.myscan.view.PagePreviewScreen import org.opencv.android.OpenCVLoader class MainActivity : ComponentActivity() { @@ -32,14 +33,27 @@ class MainActivity : ComponentActivity() { val viewModel: MainViewModel by viewModels { MainViewModel.getFactory(this) } enableEdgeToEdge() setContent { + // TODO or collectAsStateWithLifecycle()? + val currentScreen by viewModel.currentScreen.collectAsState() + // TODO should uiState own currentScreen? val uiState by viewModel.uiState.collectAsStateWithLifecycle() MyScanTheme { Scaffold { innerPadding -> Column { Greeting(modifier = Modifier.padding(innerPadding)) MyMessageBox(uiState.detectionMessage, uiState.inferenceTime) - Box { - CameraScreen(uiState, onImageAnalyzed = { image -> viewModel.segment(image) } ) + when (val screen = currentScreen) { + is Screen.Camera -> { + CameraScreen(viewModel, uiState, + onImageAnalyzed = { image -> viewModel.segment(image) } ) + } + is Screen.PagePreview -> { + PagePreviewScreen ( + image = screen.image, + isProcessing = screen.isProcessing, + onBackPressed = { viewModel.navigateTo(Screen.Camera) } + ) + } } } } diff --git a/app/src/main/java/org/mydomain/myscan/MainViewModel.kt b/app/src/main/java/org/mydomain/myscan/MainViewModel.kt index 297d4f7..237bfbf 100644 --- a/app/src/main/java/org/mydomain/myscan/MainViewModel.kt +++ b/app/src/main/java/org/mydomain/myscan/MainViewModel.kt @@ -1,17 +1,22 @@ package org.mydomain.myscan import android.content.Context +import android.graphics.Bitmap +import android.graphics.Matrix +import android.util.Log import androidx.camera.core.ImageProxy import androidx.lifecycle.ViewModel import androidx.lifecycle.ViewModelProvider import androidx.lifecycle.viewModelScope import androidx.lifecycle.viewmodel.CreationExtras +import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.StateFlow import kotlinx.coroutines.flow.asStateFlow import kotlinx.coroutines.flow.filterNotNull import kotlinx.coroutines.flow.map import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext class MainViewModel(private val imageSegmentationService: ImageSegmentationService): ViewModel() { @@ -26,6 +31,9 @@ class MainViewModel(private val imageSegmentationService: ImageSegmentationServi private var _uiState = MutableStateFlow(UiState("just started")) val uiState: StateFlow = _uiState.asStateFlow() + private val _currentScreen = MutableStateFlow(Screen.Camera) + val currentScreen: StateFlow = _currentScreen.asStateFlow() + init { viewModelScope.launch { imageSegmentationService.initialize() @@ -48,7 +56,7 @@ class MainViewModel(private val imageSegmentationService: ImageSegmentationServi fun segment(imageProxy: ImageProxy) { viewModelScope.launch { - imageSegmentationService.runSegmentation( + imageSegmentationService.runSegmentationAndEmit( imageProxy.toBitmap(), imageProxy.imageInfo.rotationDegrees, ) @@ -56,4 +64,38 @@ class MainViewModel(private val imageSegmentationService: ImageSegmentationServi } } + fun navigateTo(screen: Screen) { + _currentScreen.value = screen + } + + fun processCapturedImageAndNavigate(imageProxy: ImageProxy) { + viewModelScope.launch { + Log.d("MyScan", "Navigating to spinner") + navigateTo(Screen.PagePreview(image = null, isProcessing = true)) + val processedImage = processCapturedImage(imageProxy) + Log.d("MyScan", "Navigating to result image") + navigateTo(Screen.PagePreview(image = processedImage, isProcessing = false)) + } + } + + private suspend fun processCapturedImage(imageProxy: ImageProxy): Bitmap? = withContext(Dispatchers.IO) { + var corrected: Bitmap? = null + val bitmap = imageProxy.toBitmap().rotate(imageProxy.imageInfo.rotationDegrees) + val segmentation = imageSegmentationService.runSegmentationAndReturn(bitmap, 0) + if (segmentation != null) { + val mask = segmentation.segmentation.toBinaryMask() + val quad = detectDocumentQuad(mask) + if (quad != null) { + val resizedQuad = quad.scaledTo(mask.width, mask.height, bitmap.width, bitmap.height) + corrected = extractDocument(bitmap, resizedQuad) + } + } + return@withContext corrected + } + + fun Bitmap.rotate(degrees: Int): Bitmap { + if (degrees == 0) return this + val matrix = Matrix().apply { postRotate(degrees.toFloat()) } + return Bitmap.createBitmap(this, 0, 0, width, height, matrix, true) + } } \ No newline at end of file diff --git a/app/src/main/java/org/mydomain/myscan/Navigation.kt b/app/src/main/java/org/mydomain/myscan/Navigation.kt new file mode 100644 index 0000000..c890735 --- /dev/null +++ b/app/src/main/java/org/mydomain/myscan/Navigation.kt @@ -0,0 +1,11 @@ +package org.mydomain.myscan + +import android.graphics.Bitmap + +sealed class Screen { + object Camera : Screen() + data class PagePreview( + val image: Bitmap? = null, + val isProcessing: Boolean = true + ) : Screen() +} diff --git a/app/src/main/java/org/mydomain/myscan/view/Camera.kt b/app/src/main/java/org/mydomain/myscan/view/Camera.kt index 4f43480..531ebdf 100644 --- a/app/src/main/java/org/mydomain/myscan/view/Camera.kt +++ b/app/src/main/java/org/mydomain/myscan/view/Camera.kt @@ -1,7 +1,9 @@ package org.mydomain.myscan.view +import android.content.Context import android.content.pm.PackageManager.PERMISSION_GRANTED import android.graphics.Bitmap +import android.util.Log import android.view.ViewGroup.LayoutParams.MATCH_PARENT import android.widget.LinearLayout import android.widget.Toast @@ -10,21 +12,27 @@ import androidx.activity.result.contract.ActivityResultContracts import androidx.camera.core.AspectRatio.RATIO_4_3 import androidx.camera.core.CameraSelector import androidx.camera.core.ImageAnalysis +import androidx.camera.core.ImageCapture +import androidx.camera.core.ImageCaptureException import androidx.camera.core.ImageProxy import androidx.camera.core.Preview import androidx.camera.lifecycle.ProcessCameraProvider import androidx.camera.view.PreviewView import androidx.compose.foundation.Canvas import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.Column import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.width +import androidx.compose.material3.Button +import androidx.compose.material3.Text import androidx.compose.runtime.Composable import androidx.compose.runtime.DisposableEffect import androidx.compose.runtime.LaunchedEffect import androidx.compose.runtime.getValue import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.remember +import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.geometry.Offset import androidx.compose.ui.graphics.BlendMode @@ -37,18 +45,20 @@ import androidx.compose.ui.platform.LocalContext import androidx.compose.ui.unit.dp import androidx.compose.ui.viewinterop.AndroidView import androidx.core.content.ContextCompat +import androidx.core.graphics.scale import androidx.lifecycle.LifecycleOwner import androidx.lifecycle.compose.LocalLifecycleOwner import com.google.common.util.concurrent.ListenableFuture +import org.mydomain.myscan.MainViewModel +import org.mydomain.myscan.Point import org.mydomain.myscan.UiState +import org.mydomain.myscan.scaledTo import java.util.concurrent.ExecutorService import java.util.concurrent.Executors -import androidx.core.graphics.scale -import org.mydomain.myscan.Point -import org.mydomain.myscan.scaledTo @Composable fun CameraScreen( + viewModel: MainViewModel, uiState: UiState, onImageAnalyzed: (ImageProxy) -> Unit, ) { @@ -62,6 +72,8 @@ fun CameraScreen( } } + val captureController = remember { CameraCaptureController() } + LaunchedEffect(Unit) { val camera = android.Manifest.permission.CAMERA if (ContextCompat.checkSelfPermission(context, camera) != PERMISSION_GRANTED) { @@ -69,15 +81,35 @@ fun CameraScreen( } } - val width = LocalConfiguration.current.screenWidthDp - val height = width / 3 * 4 - Box( - modifier = Modifier - .width(width.dp) - .height(height.dp) - ) { - CameraPreview(onImageAnalyzed = onImageAnalyzed) - AnalysisOverlay(uiState) + Column { + val width = LocalConfiguration.current.screenWidthDp + val height = width / 3 * 4 + Box( + modifier = Modifier + .width(width.dp) + .height(height.dp) + ) { + CameraPreview( + onImageAnalyzed = onImageAnalyzed, + captureController = captureController) + AnalysisOverlay(uiState) + } + Button( + onClick = { + captureController.takePicture( + context = context, + onImageCaptured = { imageProxy -> + if (imageProxy != null) { + viewModel.processCapturedImageAndNavigate(imageProxy) + } else { + Log.e("MyScan", "Error during image capture") + } + } + )}, + modifier = Modifier.align(Alignment.CenterHorizontally), + ) { + Text("Capture") + } } } @@ -85,6 +117,7 @@ fun CameraScreen( fun CameraPreview( modifier: Modifier = Modifier, onImageAnalyzed: (ImageProxy) -> Unit, + captureController: CameraCaptureController, ) { val context = LocalContext.current val lifecycleOwner = LocalLifecycleOwner.current @@ -110,7 +143,8 @@ fun CameraPreview( cameraProviderFuture = cameraProviderFuture, executor = executor, previewView = previewView, - onImageAnalyzed = onImageAnalyzed + onImageAnalyzed = onImageAnalyzed, + captureController = captureController ) }, ContextCompat.getMainExecutor(context)) @@ -124,6 +158,7 @@ fun bindCameraUseCases( executor: ExecutorService, previewView: PreviewView, onImageAnalyzed: (ImageProxy) -> Unit, + captureController: CameraCaptureController, ) { val preview: Preview = Preview.Builder().setTargetAspectRatio(RATIO_4_3).build() @@ -136,8 +171,14 @@ fun bindCameraUseCases( .setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build() imageAnalysis.setAnalyzer(executor, onImageAnalyzed) + val imageCapture = ImageCapture.Builder() + .setTargetAspectRatio(RATIO_4_3) + .setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY) + .build() + captureController.imageCapture = imageCapture + val cameraProvider = cameraProviderFuture.get() - cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, imageAnalysis, preview) + cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, imageAnalysis, preview, imageCapture) } @Composable @@ -187,3 +228,23 @@ fun replaceColor(bitmap: Bitmap, toReplace: Color, replacement: Color): Bitmap { } fun Point.toOffset() = Offset(x.toFloat(), y.toFloat()) + +class CameraCaptureController { + var imageCapture: ImageCapture? = null + + fun takePicture(context: Context, onImageCaptured: (ImageProxy?) -> Unit) { + imageCapture?.takePicture( + // TODO is it a good idea to use this executor? + ContextCompat.getMainExecutor(context), + object : ImageCapture.OnImageCapturedCallback() { + override fun onCaptureSuccess(imageProxy: ImageProxy) { + onImageCaptured(imageProxy) + } + override fun onError(exception: ImageCaptureException) { + Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception) + onImageCaptured(null) + } + } + ) + } +} \ No newline at end of file diff --git a/app/src/main/java/org/mydomain/myscan/view/PagePreview.kt b/app/src/main/java/org/mydomain/myscan/view/PagePreview.kt new file mode 100644 index 0000000..c340e3d --- /dev/null +++ b/app/src/main/java/org/mydomain/myscan/view/PagePreview.kt @@ -0,0 +1,63 @@ +package org.mydomain.myscan.view + +import android.graphics.Bitmap +import androidx.compose.foundation.Image +import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.padding +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.automirrored.filled.ArrowBack +import androidx.compose.material3.CircularProgressIndicator +import androidx.compose.material3.Icon +import androidx.compose.material3.IconButton +import androidx.compose.material3.Text +import androidx.compose.runtime.Composable +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.graphics.asImageBitmap +import androidx.compose.ui.layout.ContentScale +import androidx.compose.ui.unit.dp + +@Composable +fun PagePreviewScreen( + image: Bitmap?, + isProcessing: Boolean, + onBackPressed: () -> Unit +) { + Box(modifier = Modifier.fillMaxSize()) { + when { + isProcessing -> { + CircularProgressIndicator( + modifier = Modifier + .align(Alignment.Center) + ) + } + image != null -> { + Image( + bitmap = image.asImageBitmap(), + contentDescription = "Document preview", + modifier = Modifier.fillMaxSize(), + contentScale = ContentScale.Fit + ) + } + else -> { + Text( + text = "No image is available.", + modifier = Modifier.align(Alignment.Center) + ) + } + } + + IconButton ( + onClick = onBackPressed, + modifier = Modifier + .align(Alignment.TopStart) + .padding(16.dp) + ) { + Icon( + imageVector = Icons.AutoMirrored.Filled.ArrowBack, + contentDescription = "Back" + ) + } + } +}