New quad algorithm: identify edges from contour orientation (#130)

Goal: improve precision of automatic document cropping by switching:
- from Douglas-Peucker algorithm (OpenCV's approxPolyDP) + a heuristic for documents missing a corner
- to an algorithm that looks for edges

* New quad algorithm: identify edges from contour orientation
* Performance optimization: reduce number of calls to trigonometric functions
* Performance: use a single threshold for live analysis
* Fix orientation of debug mask and compute it only if required
* Exclude quads that go out of the frame
This commit is contained in:
Pierre-Yves Nicolas
2026-03-07 12:09:41 +01:00
committed by GitHub
parent cf196576fe
commit 343495dafe
14 changed files with 488 additions and 316 deletions

View File

@@ -29,6 +29,7 @@ import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.coroutines.withContext
import org.fairscan.app.data.Logger
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.Mask
import org.opencv.core.CvType
import org.opencv.core.Mat
@@ -39,7 +40,6 @@ import org.tensorflow.lite.support.common.ops.NormalizeOp
import org.tensorflow.lite.support.image.ImageProcessor
import org.tensorflow.lite.support.image.TensorImage
import org.tensorflow.lite.support.image.ops.ResizeOp
import org.tensorflow.lite.support.image.ops.Rot90Op
import java.nio.ByteBuffer
import java.nio.ByteOrder
@@ -73,13 +73,11 @@ class ImageSegmentationService(private val context: Context, private val logger:
private fun runSegmentation(interpreter: Interpreter, bitmap: Bitmap, rotationDegrees: Int): SegmentationResult {
val startTime = SystemClock.uptimeMillis()
val rotation = -rotationDegrees / 90
val (_, h, w, _) = interpreter.getOutputTensor(0).shape()
val imageProcessor =
ImageProcessor
.Builder()
.add(ResizeOp(h, w, ResizeOp.ResizeMethod.BILINEAR))
.add(Rot90Op(rotation))
.add(NormalizeOp(127.5f, 127.5f)) // TODO check if it's correct
.build()
val tensorImage = TensorImage(DataType.FLOAT32)
@@ -88,7 +86,11 @@ class ImageSegmentationService(private val context: Context, private val logger:
val segmentResult = segment(interpreter, processedImage)
val inferenceTime = SystemClock.uptimeMillis() - startTime
return SegmentationResult(segmentResult, inferenceTime)
return SegmentationResult(
segmentResult,
ImageSize(bitmap.width, bitmap.height),
rotationDegrees,
inferenceTime)
}
suspend fun runSegmentationAndReturn(bitmap: Bitmap, rotationDegrees: Int): SegmentationResult? {
@@ -163,10 +165,14 @@ class ImageSegmentationService(private val context: Context, private val logger:
mask.put(0, 0, data)
return mask
}
fun maskSize() = ImageSize(width, height)
}
data class SegmentationResult(
val segmentation: Segmentation,
val originalSize: ImageSize,
val rotationDegrees: Int,
val inferenceTime: Long
)
}

View File

@@ -211,7 +211,7 @@ fun bindCameraUseCases(
@Composable
fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) {
val binaryMask = liveAnalysisState.binaryMask ?: return
val maskSize = liveAnalysisState.maskSize ?: return
val targetQuad = liveAnalysisState.stableQuad
var displayedQuad by remember { mutableStateOf<Quad?>(null) }
val quadColor = MaterialTheme.colorScheme.primary
@@ -233,14 +233,15 @@ fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) {
Canvas(modifier = Modifier.fillMaxSize()) {
if (debugMode) {
drawMask(this, binaryMask)
val binaryMask = liveAnalysisState.binaryMaskProvider.invoke()
binaryMask?.let { drawMask(this, it) }
}
displayedQuad?.let { quad ->
val scaledQuad = quad.scaledTo(
fromWidth = binaryMask.width,
fromHeight = binaryMask.height,
toWidth = size.width.toInt(),
toHeight = size.height.toInt()
fromWidth = maskSize.width,
fromHeight = maskSize.height,
toWidth = size.width.toDouble(),
toHeight = size.height.toDouble()
)
scaledQuad.edges().forEach {
drawLine(quadColor, it.from.toOffset(), it.to.toOffset(), 10.0f)

View File

@@ -16,12 +16,14 @@ package org.fairscan.app.ui.screens.camera
import android.graphics.Bitmap
import androidx.compose.runtime.Immutable
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.Quad
@Immutable
data class LiveAnalysisState(
val inferenceTime: Long = 0L,
val binaryMask: Bitmap? = null,
val maskSize: ImageSize? = null,
val binaryMaskProvider: () -> Bitmap? = { -> null },
val documentQuad: Quad? = null,
val stableQuad: Quad? = null,
)

View File

@@ -15,6 +15,7 @@
package org.fairscan.app.ui.screens.camera
import android.graphics.Bitmap
import android.graphics.Matrix
import androidx.camera.core.ImageProxy
import androidx.core.graphics.createBitmap
import androidx.lifecycle.ViewModel
@@ -72,16 +73,29 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
imageSegmentationService.segmentation
.filterNotNull()
.collect { result ->
// TODO Should we really call toBinaryMask if it's used only in debug mode?
val binaryMask = result.segmentation.toBinaryMask()
val binaryMaskProvider = { ->
var binaryMask: Bitmap = result.segmentation.toBinaryMask()
if (result.rotationDegrees != 0) {
binaryMask = rotateBitmap(binaryMask, result.rotationDegrees.toFloat())
}
binaryMask
}
val rawQuad = detectDocumentQuad(
result.segmentation,
result.originalSize,
isLiveAnalysis = true
)?.rotate90(
result.rotationDegrees / 90,
result.segmentation.width,
result.segmentation.height
)
val stableQuad = quadStabilizer.update(rawQuad)
_liveAnalysisState.value = LiveAnalysisState(
inferenceTime = result.inferenceTime,
binaryMask = binaryMask,
binaryMaskProvider = binaryMaskProvider,
maskSize = result.segmentation.maskSize(),
documentQuad = rawQuad,
stableQuad = stableQuad,
)
@@ -145,13 +159,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
private suspend fun processCapturedImage(
source: Bitmap,
rotationDegrees: Int
rotationDegrees: Int,
): CapturedPage? = withContext(Dispatchers.IO) {
var result: CapturedPage? = null
val segmentation = imageSegmentationService.runSegmentationAndReturn(source, 0)
if (segmentation != null) {
val mask = segmentation.segmentation
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
val quad = detectDocumentQuad(mask, segmentation.originalSize, isLiveAnalysis = false)
if (quad != null) {
val resizedQuad = quad.scaledTo(mask.width, mask.height, source.width, source.height)
result = extractDocumentFromBitmap(source, resizedQuad, rotationDegrees, mask)
@@ -230,3 +244,9 @@ fun toBitmap(bgr: Mat): Bitmap {
rgba.release()
return bmp
}
fun rotateBitmap(source: Bitmap, angle: Float): Bitmap {
val matrix = Matrix()
matrix.postRotate(angle)
return Bitmap.createBitmap(source, 0, 0, source.getWidth(), source.getHeight(), matrix, true)
}