diff --git a/app/src/main/java/org/fairscan/app/domain/ImageSegmentation.kt b/app/src/main/java/org/fairscan/app/domain/ImageSegmentation.kt index 15f855e..63bc5b5 100644 --- a/app/src/main/java/org/fairscan/app/domain/ImageSegmentation.kt +++ b/app/src/main/java/org/fairscan/app/domain/ImageSegmentation.kt @@ -29,6 +29,7 @@ import kotlinx.coroutines.sync.Mutex import kotlinx.coroutines.sync.withLock import kotlinx.coroutines.withContext import org.fairscan.app.data.Logger +import org.fairscan.imageprocessing.ImageSize import org.fairscan.imageprocessing.Mask import org.opencv.core.CvType import org.opencv.core.Mat @@ -39,7 +40,6 @@ import org.tensorflow.lite.support.common.ops.NormalizeOp import org.tensorflow.lite.support.image.ImageProcessor import org.tensorflow.lite.support.image.TensorImage import org.tensorflow.lite.support.image.ops.ResizeOp -import org.tensorflow.lite.support.image.ops.Rot90Op import java.nio.ByteBuffer import java.nio.ByteOrder @@ -73,13 +73,11 @@ class ImageSegmentationService(private val context: Context, private val logger: private fun runSegmentation(interpreter: Interpreter, bitmap: Bitmap, rotationDegrees: Int): SegmentationResult { val startTime = SystemClock.uptimeMillis() - val rotation = -rotationDegrees / 90 val (_, h, w, _) = interpreter.getOutputTensor(0).shape() val imageProcessor = ImageProcessor .Builder() .add(ResizeOp(h, w, ResizeOp.ResizeMethod.BILINEAR)) - .add(Rot90Op(rotation)) .add(NormalizeOp(127.5f, 127.5f)) // TODO check if it's correct .build() val tensorImage = TensorImage(DataType.FLOAT32) @@ -88,7 +86,11 @@ class ImageSegmentationService(private val context: Context, private val logger: val segmentResult = segment(interpreter, processedImage) val inferenceTime = SystemClock.uptimeMillis() - startTime - return SegmentationResult(segmentResult, inferenceTime) + return SegmentationResult( + segmentResult, + ImageSize(bitmap.width, bitmap.height), + rotationDegrees, + inferenceTime) } suspend fun runSegmentationAndReturn(bitmap: Bitmap, rotationDegrees: Int): SegmentationResult? { @@ -163,10 +165,14 @@ class ImageSegmentationService(private val context: Context, private val logger: mask.put(0, 0, data) return mask } + + fun maskSize() = ImageSize(width, height) } data class SegmentationResult( val segmentation: Segmentation, + val originalSize: ImageSize, + val rotationDegrees: Int, val inferenceTime: Long ) } diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraPreview.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraPreview.kt index 040aa2e..c230d8d 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraPreview.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraPreview.kt @@ -211,7 +211,7 @@ fun bindCameraUseCases( @Composable fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) { - val binaryMask = liveAnalysisState.binaryMask ?: return + val maskSize = liveAnalysisState.maskSize ?: return val targetQuad = liveAnalysisState.stableQuad var displayedQuad by remember { mutableStateOf(null) } val quadColor = MaterialTheme.colorScheme.primary @@ -233,14 +233,15 @@ fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) { Canvas(modifier = Modifier.fillMaxSize()) { if (debugMode) { - drawMask(this, binaryMask) + val binaryMask = liveAnalysisState.binaryMaskProvider.invoke() + binaryMask?.let { drawMask(this, it) } } displayedQuad?.let { quad -> val scaledQuad = quad.scaledTo( - fromWidth = binaryMask.width, - fromHeight = binaryMask.height, - toWidth = size.width.toInt(), - toHeight = size.height.toInt() + fromWidth = maskSize.width, + fromHeight = maskSize.height, + toWidth = size.width.toDouble(), + toHeight = size.height.toDouble() ) scaledQuad.edges().forEach { drawLine(quadColor, it.from.toOffset(), it.to.toOffset(), 10.0f) diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraUiState.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraUiState.kt index dacaa2a..0dd9c25 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraUiState.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraUiState.kt @@ -16,12 +16,14 @@ package org.fairscan.app.ui.screens.camera import android.graphics.Bitmap import androidx.compose.runtime.Immutable +import org.fairscan.imageprocessing.ImageSize import org.fairscan.imageprocessing.Quad @Immutable data class LiveAnalysisState( val inferenceTime: Long = 0L, - val binaryMask: Bitmap? = null, + val maskSize: ImageSize? = null, + val binaryMaskProvider: () -> Bitmap? = { -> null }, val documentQuad: Quad? = null, val stableQuad: Quad? = null, ) diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt index 1a42fc9..b1768a0 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt @@ -15,6 +15,7 @@ package org.fairscan.app.ui.screens.camera import android.graphics.Bitmap +import android.graphics.Matrix import androidx.camera.core.ImageProxy import androidx.core.graphics.createBitmap import androidx.lifecycle.ViewModel @@ -72,16 +73,29 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { imageSegmentationService.segmentation .filterNotNull() .collect { result -> - // TODO Should we really call toBinaryMask if it's used only in debug mode? - val binaryMask = result.segmentation.toBinaryMask() + val binaryMaskProvider = { -> + var binaryMask: Bitmap = result.segmentation.toBinaryMask() + if (result.rotationDegrees != 0) { + binaryMask = rotateBitmap(binaryMask, result.rotationDegrees.toFloat()) + } + binaryMask + } + val rawQuad = detectDocumentQuad( result.segmentation, + result.originalSize, isLiveAnalysis = true + )?.rotate90( + result.rotationDegrees / 90, + result.segmentation.width, + result.segmentation.height ) + val stableQuad = quadStabilizer.update(rawQuad) _liveAnalysisState.value = LiveAnalysisState( inferenceTime = result.inferenceTime, - binaryMask = binaryMask, + binaryMaskProvider = binaryMaskProvider, + maskSize = result.segmentation.maskSize(), documentQuad = rawQuad, stableQuad = stableQuad, ) @@ -145,13 +159,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { private suspend fun processCapturedImage( source: Bitmap, - rotationDegrees: Int + rotationDegrees: Int, ): CapturedPage? = withContext(Dispatchers.IO) { var result: CapturedPage? = null val segmentation = imageSegmentationService.runSegmentationAndReturn(source, 0) if (segmentation != null) { val mask = segmentation.segmentation - val quad = detectDocumentQuad(mask, isLiveAnalysis = false) + val quad = detectDocumentQuad(mask, segmentation.originalSize, isLiveAnalysis = false) if (quad != null) { val resizedQuad = quad.scaledTo(mask.width, mask.height, source.width, source.height) result = extractDocumentFromBitmap(source, resizedQuad, rotationDegrees, mask) @@ -230,3 +244,9 @@ fun toBitmap(bgr: Mat): Bitmap { rgba.release() return bmp } + +fun rotateBitmap(source: Bitmap, angle: Float): Bitmap { + val matrix = Matrix() + matrix.postRotate(angle) + return Bitmap.createBitmap(source, 0, 0, source.getWidth(), source.getHeight(), matrix, true) +} diff --git a/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt b/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt index 3efd57f..e6eb5d1 100644 --- a/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt +++ b/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt @@ -18,6 +18,7 @@ import org.fairscan.imageprocessing.detectDocumentQuad import org.fairscan.imageprocessing.extractDocument import org.fairscan.imageprocessing.isColoredDocument import org.fairscan.imageprocessing.scaledTo +import org.fairscan.imageprocessing.toImageSize import org.opencv.imgcodecs.Imgcodecs import java.io.File @@ -57,7 +58,7 @@ object ColorDetectionEvaluator { val mask = MatMask(maskMat) - val quad = detectDocumentQuad(mask, isLiveAnalysis = false) + val quad = detectDocumentQuad(mask, mat.size().toImageSize(), isLiveAnalysis = false) ?.scaledTo(mask.width, mask.height, mat.width(), mat.height()) if (quad == null) continue diff --git a/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt b/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt index a93c407..ceb91a0 100644 --- a/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt +++ b/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt @@ -19,6 +19,7 @@ import org.fairscan.imageprocessing.detectDocumentQuad import org.fairscan.imageprocessing.extractDocument import org.fairscan.imageprocessing.isColoredDocument import org.fairscan.imageprocessing.scaledTo +import org.fairscan.imageprocessing.toImageSize import org.opencv.core.Mat import org.opencv.imgcodecs.Imgcodecs import java.io.File @@ -68,7 +69,8 @@ object DatasetEvaluator { val mask = MatMask(maskMat) - val quad = detectDocumentQuad(mask, isLiveAnalysis = false) + val originalSize = inputMat.size().toImageSize() + val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) ?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height()) val corrected: Mat? = if (quad != null) { diff --git a/evaluation/src/main/java/org/fairscan/evaluation/ExportQualityEvaluator.kt b/evaluation/src/main/java/org/fairscan/evaluation/ExportQualityEvaluator.kt index d065279..8815156 100644 --- a/evaluation/src/main/java/org/fairscan/evaluation/ExportQualityEvaluator.kt +++ b/evaluation/src/main/java/org/fairscan/evaluation/ExportQualityEvaluator.kt @@ -18,6 +18,7 @@ import org.fairscan.imageprocessing.detectDocumentQuad import org.fairscan.imageprocessing.extractDocument import org.fairscan.imageprocessing.isColoredDocument import org.fairscan.imageprocessing.scaledTo +import org.fairscan.imageprocessing.toImageSize import org.opencv.core.MatOfInt import org.opencv.imgcodecs.Imgcodecs import java.io.File @@ -56,7 +57,8 @@ object ExportQualityEvaluator { val mask = MatMask(maskMat) - val quad = detectDocumentQuad(mask, isLiveAnalysis = false) + val originalSize = sourceMat.size().toImageSize() + val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) ?.scaledTo(mask.width, mask.height, sourceMat.width(), sourceMat.height()) if (quad == null) { println("Failed to detect quad for $imgName") diff --git a/evaluation/src/main/java/org/fairscan/evaluation/QuadDetectionEvaluator.kt b/evaluation/src/main/java/org/fairscan/evaluation/QuadDetectionEvaluator.kt index 1a7ff18..7d0ec16 100644 --- a/evaluation/src/main/java/org/fairscan/evaluation/QuadDetectionEvaluator.kt +++ b/evaluation/src/main/java/org/fairscan/evaluation/QuadDetectionEvaluator.kt @@ -18,6 +18,7 @@ import nu.pattern.OpenCV import org.fairscan.imageprocessing.detectDocumentQuad import org.fairscan.imageprocessing.scaledTo import org.fairscan.imageprocessing.toCv +import org.fairscan.imageprocessing.toImageSize import org.opencv.core.Core import org.opencv.core.Mat import org.opencv.core.Scalar @@ -63,7 +64,8 @@ object QuadDetectionEvaluator { val mask = MatMask(maskMat) - val quad = detectDocumentQuad(mask, isLiveAnalysis = false) + val originalSize = inputMat.size().toImageSize() + val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) ?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height()) val inputOut = File(outputDir, "${e.name}_input.jpg") diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt index 44863e5..f09c5fd 100644 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt @@ -14,10 +14,11 @@ */ package org.fairscan.imageprocessing -import org.fairscan.imageprocessing.quad.detectDocumentQuadFromProbmap -import org.fairscan.imageprocessing.quad.findQuadFromRightAngles +import org.fairscan.imageprocessing.quad.findQuadFromContourOrientation import org.fairscan.imageprocessing.quad.minAreaRect +import org.fairscan.imageprocessing.quad.scoreQuadAgainstProbmap import org.opencv.core.Core +import org.opencv.core.CvType import org.opencv.core.Mat import org.opencv.core.MatOfPoint import org.opencv.core.MatOfPoint2f @@ -31,35 +32,78 @@ interface Mask { fun toMat(): Mat } -fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Double = 0.02): Quad? { +fun detectDocumentQuad(mask: Mask, originalSize: ImageSize, isLiveAnalysis: Boolean): Quad? { val mat = mask.toMat() - val (biggest: MatOfPoint2f?, area) = biggestContour(mat) - var vertices: List? - if (biggest != null && biggest.total() == 4L && area > mask.width * mask.height * minQuadAreaRatio) { - vertices = biggest.toList()?.map { Point(it.x, it.y) } - } else { - - // Fallback 1: adjust threshold - val thresholds = - if (isLiveAnalysis) listOf(25.0, 50.0, 75.0) else (0..12).map { 0.2 + it * 0.05 } - vertices = detectDocumentQuadFromProbmap(mat, thresholds) - ?.map { Point(it.x, it.y) } - if (vertices == null && biggest != null && biggest.total() > 4) { - - // Fallback 2: look for right angles + // Best thresholds on test dataset: {0.95=146, 0.85=39, 0.75=35, 0.90=8, 0.70=1, 0.35=1} + val thresholds = + if (isLiveAnalysis) listOf(0.9) else listOf(0.5, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95) + var vertices = findQuadFromOrientationWithAdaptiveThreshold(mat, originalSize, thresholds) + ?.map { Point(it.x, it.y) } + if (vertices == null && !isLiveAnalysis) { + // Fallback: bounding rectangle + val biggest = biggestContour(mat) + if (biggest != null) { val polygon = biggest.toList().map { Point(it.x, it.y) } - vertices = findQuadFromRightAngles(polygon, mask.width, mask.height) - if (vertices == null && !isLiveAnalysis) { - - // Fallback 3: bounding rectangle - vertices = minAreaRect(polygon, mask.width, mask.height) - } + vertices = minAreaRect(polygon, mask.width, mask.height) } } return if (vertices?.size == 4) createQuad(vertices) else null } -private fun biggestContour(mat: Mat): Pair { +fun findQuadFromOrientationWithAdaptiveThreshold( + maskMat: Mat, originalSize: ImageSize, thresholds: List +): List? { + val probmapU8 = Mat() + val probmap = maskMat + probmap.convertTo(probmapU8, CvType.CV_8U, 255.0) + val probmapSmooth = Mat() + Imgproc.GaussianBlur(probmapU8, probmapSmooth, Size(3.0, 3.0), 0.0) + + var bestQuad: List? = null + var bestScore = 0.0 + for (thr in thresholds) { + val bin = Mat() + Imgproc.threshold(probmapSmooth, bin, thr * 255.0, 255.0, Imgproc.THRESH_BINARY) + val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0)) + Imgproc.morphologyEx(bin, bin, Imgproc.MORPH_CLOSE, kernel) + val quad = findQuadFromOrientation(bin, originalSize) + if (quad != null && isValidQuad(quad, originalSize)) { + val probFloat = Mat() + probmap.convertTo(probFloat, CvType.CV_32F) + val score = scoreQuadAgainstProbmap(quad, probFloat, minQuadAreaRatio = 0.02) + if (score > bestScore) { + bestScore = score + bestQuad = quad + } + } + bin.release() + } + + probmapSmooth.release() + probmapU8.release() + return bestQuad +} + +fun isValidQuad(quad: List, originalSize: ImageSize): Boolean { + return quad.all { + it.x >= 0 && it.x <= originalSize.width + && it.y >= 0 && it.y <= originalSize.height + } +} + +fun findQuadFromOrientation(maskMat: Mat, originalSize: ImageSize): List? { + val contour = biggestContour(maskMat) + contour?:return null + + val scaleX = originalSize.width / maskMat.size().width + val scaleY = originalSize.height / maskMat.size().height + + return findQuadFromContourOrientation( + contour.toList().map { org.opencv.core.Point(it.x * scaleX, it.y * scaleY) } + )?.map { org.opencv.core.Point(it.x / scaleX, it.y / scaleY) } +} + +fun biggestContour(mat: Mat): MatOfPoint? { val refinedMask = refineMask(mat) val blurred = Mat() @@ -70,24 +114,19 @@ private fun biggestContour(mat: Mat): Pair { val contours = mutableListOf() val hierarchy = Mat() - Imgproc.findContours(edges, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE) + Imgproc.findContours(edges, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_NONE) - var biggest: MatOfPoint2f? = null + var biggest: MatOfPoint? = null var maxArea = 0.0 for (contour in contours) { - val contour2f = MatOfPoint2f(*contour.toArray()) - val peri = Imgproc.arcLength(contour2f, true) - val approx = MatOfPoint2f() - Imgproc.approxPolyDP(contour2f, approx, 0.02 * peri, true) - - val area = abs(Imgproc.contourArea(approx)) + val area = abs(Imgproc.contourArea(contour)) if (area > maxArea) { maxArea = area - biggest = approx + biggest = contour } } - return Pair(biggest, maxArea) + return biggest } /** @@ -171,3 +210,6 @@ fun Point.toCv(): org.opencv.core.Point { return org.opencv.core.Point(x, y) } +fun Size.toImageSize(): ImageSize { + return ImageSize(width, height) +} diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt index 1efa857..e4063d4 100644 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt @@ -103,3 +103,7 @@ fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): fun Point.scaled(scaleX: Double, scaleY: Double): Point { return Point((x * scaleX), (y * scaleY)) } + +data class ImageSize(val width: Double, val height: Double) { + constructor(width: Int, height: Int) : this (width.toDouble(), height.toDouble()) +} diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/AdaptiveThreshold.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/AdaptiveThreshold.kt deleted file mode 100644 index af1907e..0000000 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/AdaptiveThreshold.kt +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright 2025-2026 Pierre-Yves Nicolas - * - * This program is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ -package org.fairscan.imageprocessing.quad - -import org.opencv.core.Mat -import org.opencv.core.CvType -import org.opencv.core.Size -import org.opencv.core.Point -import org.opencv.core.Scalar -import org.opencv.core.MatOfPoint -import org.opencv.core.MatOfPoint2f -import org.opencv.core.Core -import org.opencv.imgproc.Imgproc -import kotlin.math.abs - -// Look for a threshold for which we find a quad in the mask -fun detectDocumentQuadFromProbmap( - probmap: Mat, - thresholds: List, - useOtsu: Boolean = true, - minQuadAreaRatio: Double = 0.02 -): List? { - val probmapU8 = Mat() - probmap.convertTo(probmapU8, CvType.CV_8U, 255.0) - val probmapSmooth = Mat() - Imgproc.GaussianBlur(probmapU8, probmapSmooth, Size(3.0, 3.0), 0.0) - - var bestScore = 0.0 - var bestQuad: List? = null - - // 1) Otsu - if (useOtsu) { - val otsu = Mat() - Imgproc.threshold(probmapSmooth, otsu, 0.0, 255.0, Imgproc.THRESH_BINARY + Imgproc.THRESH_OTSU) - val quad = findQuadFromBinaryMask(otsu, minQuadAreaRatio) - if (quad != null) { - val probFloat = Mat() - probmap.convertTo(probFloat, CvType.CV_32F) - val sc = scoreQuadAgainstProbmap(quad, probFloat) - if (sc > bestScore) { - bestScore = sc - bestQuad = quad - } - } - } - - // 2) Threshold sweep - for (thr in thresholds) { - val bin = Mat() - Imgproc.threshold(probmapSmooth, bin, thr * 255.0, 255.0, Imgproc.THRESH_BINARY) - val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0)) - Imgproc.morphologyEx(bin, bin, Imgproc.MORPH_CLOSE, kernel) - val quad = findQuadFromBinaryMask(bin, minQuadAreaRatio) - if (quad != null) { - val probFloat = Mat() - probmap.convertTo(probFloat, CvType.CV_32F) - val sc = scoreQuadAgainstProbmap(quad, probFloat) - if (sc > bestScore) { - bestScore = sc - bestQuad = quad - } - } - } - - return bestQuad -} - -// Fill polygon and return binary mask (0/1) -fun makePolygonMask(size: Size, polygon: List): Mat { - val mask = Mat.zeros(size, CvType.CV_8U) - val pts = MatOfPoint(*polygon.toTypedArray()) - Imgproc.fillPoly(mask, listOf(pts), Scalar(1.0)) - return mask -} - -// Compute score between quad and probmap -fun scoreQuadAgainstProbmap(quad: List, probmap: Mat): Double { - val mask = makePolygonMask(probmap.size(), quad) - val maskFloat = Mat() - mask.convertTo(maskFloat, CvType.CV_32F) - val masked = Mat() - Core.multiply(probmap, maskFloat, masked) - val meanProb = Core.sumElems(masked).`val`[0] / Core.sumElems(maskFloat).`val`[0] - val areaRatio = Core.sumElems(maskFloat).`val`[0] / (probmap.rows() * probmap.cols()) - return meanProb * (0.7 + 0.3 * areaRatio) -} - -// Find largest quadrilateral in a binary mask -fun findQuadFromBinaryMask(binMask: Mat, minQuadAreaRatio: Double = 0.02): List? { - val blurred = Mat() - Imgproc.GaussianBlur(binMask, blurred, Size(5.0, 5.0), 0.0) - val edges = Mat() - Imgproc.Canny(blurred, edges, 75.0, 200.0) - - val contours = mutableListOf() - Imgproc.findContours(edges, contours, Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE) - - var biggest: MatOfPoint2f? = null - var maxArea = 0.0 - for (cnt in contours) { - val cnt2f = MatOfPoint2f(*cnt.toArray()) - val peri = Imgproc.arcLength(cnt2f, true) - val approx = MatOfPoint2f() - Imgproc.approxPolyDP(cnt2f, approx, 0.02 * peri, true) - if (approx.rows() == 4) { - val area = abs(Imgproc.contourArea(approx)) - if (area > maxArea) { - maxArea = area - biggest = approx - } - } - } - val totalArea = binMask.rows() * binMask.cols().toDouble() - return if (maxArea > totalArea * minQuadAreaRatio && biggest != null) { - biggest.toList() - } else null -} diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/ContourOrientation.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/ContourOrientation.kt new file mode 100644 index 0000000..ee109f5 --- /dev/null +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/ContourOrientation.kt @@ -0,0 +1,308 @@ +/* + * Copyright 2025-2026 Pierre-Yves Nicolas + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.imageprocessing.quad + +import org.opencv.core.Point +import kotlin.math.abs +import kotlin.math.atan2 +import kotlin.math.cos +import kotlin.math.hypot +import kotlin.math.sin + +/** +Instead of detecting corners (like Douglas-Peucker), this algorithm detects +the four dominant sides of the document by segmenting the contour according +to stable edge orientations, then fits lines and intersects them to +reconstruct the quadrilateral. +*/ +fun findQuadFromContourOrientation( + contour: List, + smoothWindow: Int = 5, + maxAngleVar: Double = Math.toRadians(5.0), + mergeAngle: Double = Math.toRadians(7.0), + minSideLengthRatio: Double = 0.02 +): List? { + + if (contour.size < 20) return null + + val angles = computeSmoothedAngles(contour, smoothWindow) + + val perimeter = contour.zipWithNext { a, b -> hypot(b.x - a.x, b.y - a.y) }.sum() + + val minLength = perimeter * minSideLengthRatio + val segments = extractSegments(contour, angles, maxAngleVar, minLength) + val mergedSegments = mergeSegments(segments, mergeAngle) + val dominantSegments = selectDominantSegments( + mergedSegments, + maxCount = 4, + minAngleSeparation = Math.toRadians(25.0) + ) + + if (dominantSegments.size != 4) return null + + val lines = dominantSegments.map { + val points = if (it.start < it.end) + contour.subList(it.start, it.end) + else + contour.subList(it.start, contour.size) + contour.subList(0, it.end) + fitLine(points) + } + + val corners = mutableListOf() + for (i in 0 until 4) { + val p = intersectLines(lines[i], lines[(i + 1) % 4]) + ?: return null + corners += p + } + return corners +} + +private fun normalizeAngle(a: Double): Double { + var x = a + while (x <= -Math.PI) x += 2 * Math.PI + while (x > Math.PI) x -= 2 * Math.PI + return x +} + +private fun angleDiff(a: Double, b: Double): Double = + abs(normalizeAngle(a - b)) + +private data class Line( + val p: Point, + val d: Point +) + +private fun fitLine(points: List): Line { + val cx = points.map { it.x }.average() + val cy = points.map { it.y }.average() + + var xx = 0.0 + var xy = 0.0 + var yy = 0.0 + + for (p in points) { + val dx = p.x - cx + val dy = p.y - cy + xx += dx * dx + xy += dx * dy + yy += dy * dy + } + + val theta = 0.5 * atan2(2 * xy, xx - yy) + val dir = Point(cos(theta), sin(theta)) + + return Line(Point(cx, cy), dir) +} + +private fun intersectLines(l1: Line, l2: Line): Point? { + val x1 = l1.p.x + val y1 = l1.p.y + val x2 = x1 + l1.d.x + val y2 = y1 + l1.d.y + + val x3 = l2.p.x + val y3 = l2.p.y + val x4 = x3 + l2.d.x + val y4 = y3 + l2.d.y + + val denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4) + if (abs(denom) < 1e-6) return null + + val px = + ((x1*y2 - y1*x2)*(x3 - x4) - (x1 - x2)*(x3*y4 - y3*x4)) / denom + val py = + ((x1*y2 - y1*x2)*(y3 - y4) - (y1 - y2)*(x3*y4 - y3*x4)) / denom + + return Point(px, py) +} + +private data class ContourSegment( + val start: Int, + val end: Int, + val angle: Double, + val length: Double +) + +private fun extractSegments( + contour: List, + angles: DoubleArray, + maxAngleVar: Double, + minLength: Double +): List { + + val n = contour.size + val result = mutableListOf() + + val startIndex = findBestStartIndex(angles) + + var start = startIndex + var refAngle = angles[startIndex] + + fun segmentLength(s: Int, e: Int): Double { + var len = 0.0 + var i = s + while (i != e) { + val j = (i + 1) % n + len += hypot( + contour[j].x - contour[i].x, + contour[j].y - contour[i].y + ) + i = j + } + return len + } + + var steps = 1 + while (steps <= n) { + val idx = (startIndex + steps) % n + + if (steps < n && angleDiff(angles[idx], refAngle) < maxAngleVar) { + refAngle = angleMean(refAngle, angles[idx]) + } else { + val len = segmentLength(start, idx) + if (len >= minLength) { + result += ContourSegment(start, idx, refAngle, len) + } + start = idx + refAngle = angles[idx] + } + + steps++ + } + + return result +} + +private fun findBestStartIndex(angles: DoubleArray): Int { + val n = angles.size + var bestIndex = 0 + var bestDelta = 0.0 + + for (i in 0 until n) { + val j = (i + 1) % n + val d = angleDiff(angles[i], angles[j]) + if (d > bestDelta) { + bestDelta = d + bestIndex = j + } + } + return bestIndex +} + +private fun angleMean(a: Double, b: Double): Double { + val x = cos(a) + cos(b) + val y = sin(a) + sin(b) + return atan2(y, x) +} + +private fun computeSmoothedAngles( + contour: List, + window: Int +): DoubleArray { + val n = contour.size + + // --- Step 1: raw angles --- + val angles = DoubleArray(n) + for (i in 0 until n) { + val p0 = contour[(i - 1 + n) % n] + val p1 = contour[(i + 1) % n] + angles[i] = atan2(p1.y - p0.y, p1.x - p0.x) + } + + // --- Step 2: precompute cos/sin --- + val cosA = DoubleArray(n) + val sinA = DoubleArray(n) + for (i in 0 until n) { + cosA[i] = cos(angles[i]) + sinA[i] = sin(angles[i]) + } + + // --- Step 3: sliding window smoothing --- + val smooth = DoubleArray(n) + + var sx = 0.0 + var sy = 0.0 + + // initial window centered on index 0 + for (k in -window..window) { + val idx = (k + n) % n + sx += cosA[idx] + sy += sinA[idx] + } + + smooth[0] = atan2(sy, sx) + + for (i in 1 until n) { + val outIdx = (i - window - 1 + n) % n + val inIdx = (i + window) % n + sx -= cosA[outIdx] + sy -= sinA[outIdx] + sx += cosA[inIdx] + sy += sinA[inIdx] + smooth[i] = atan2(sy, sx) + } + return smooth +} + +private fun mergeSegments( + segments: List, + angleThreshold: Double +): List { + if (segments.isEmpty()) return emptyList() + if (segments.size <= 4) return segments + + val merged = mutableListOf() + var cur = segments[0] + + for (i in 1 until segments.size) { + val p = segments[i] + if (angleDiff(p.angle, cur.angle) < angleThreshold) { + cur = ContourSegment( + cur.start, + p.end, + angleMean(cur.angle, p.angle), + cur.length + p.length + ) + } else { + merged += cur + cur = p + } + } + merged += cur + return merged +} + +private fun selectDominantSegments( + segments: List, + maxCount: Int, + minAngleSeparation: Double +): List { + + val sorted = segments.sortedByDescending { it.length } + val selected = mutableListOf() + + for (p in sorted) { + val tooClose = selected.any { s -> + angleDiff(p.angle, s.angle) < minAngleSeparation + } + + if (!tooClose) { + selected += p + if (selected.size == maxCount) break + } + } + + return selected.sortedBy { it.start } +} diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/QuadScore.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/QuadScore.kt new file mode 100644 index 0000000..312f0c0 --- /dev/null +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/QuadScore.kt @@ -0,0 +1,44 @@ +/* + * Copyright 2025-2026 Pierre-Yves Nicolas + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.imageprocessing.quad + +import org.opencv.core.Core +import org.opencv.core.CvType +import org.opencv.core.Mat +import org.opencv.core.MatOfPoint +import org.opencv.core.Point +import org.opencv.core.Scalar +import org.opencv.core.Size +import org.opencv.imgproc.Imgproc + +// Fill polygon and return binary mask (0/1) +fun makePolygonMask(size: Size, polygon: List): Mat { + val mask = Mat.zeros(size, CvType.CV_8U) + val pts = MatOfPoint(*polygon.toTypedArray()) + Imgproc.fillPoly(mask, listOf(pts), Scalar(1.0)) + return mask +} + +// Compute score between quad and probmap +fun scoreQuadAgainstProbmap(quad: List, probmap: Mat, minQuadAreaRatio: Double): Double { + val mask = makePolygonMask(probmap.size(), quad) + val maskFloat = Mat() + mask.convertTo(maskFloat, CvType.CV_32F) + val masked = Mat() + Core.multiply(probmap, maskFloat, masked) + val meanProb = Core.sumElems(masked).`val`[0] / Core.sumElems(maskFloat).`val`[0] + val areaRatio = Core.sumElems(maskFloat).`val`[0] / (probmap.rows() * probmap.cols()) + return if (areaRatio < minQuadAreaRatio) 0.0 else meanProb * (0.7 + 0.3 * areaRatio) +} diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/RightAngles.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/RightAngles.kt deleted file mode 100644 index 749c877..0000000 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/quad/RightAngles.kt +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright 2025-2026 Pierre-Yves Nicolas - * - * This program is free software: you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation, either version 3 of the License, or (at your option) - * any later version. - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * You should have received a copy of the GNU General Public License along with - * this program. If not, see . - */ -package org.fairscan.imageprocessing.quad - -import org.fairscan.imageprocessing.Point -import kotlin.math.abs -import kotlin.math.acos -import kotlin.math.sqrt -import kotlin.math.sign - -// Look for 3 consecutive angles that are (almost) right angles -fun findQuadFromRightAngles( - points: List, - imgWidth: Int, - imgHeight: Int, - angleMin: Float = 60f, - angleMax: Float = 120f -): List? { - if (points.size < 4) return null - val n = points.size - - val angles = mutableListOf() - for (i in 0 until n) { - val a = points[(i + n - 1) % n] - val b = points[i] - val c = points[(i + 1) % n] - angles.add(orientedAngle(a, b, c)) - } - - var bestQuad: List? = null - var bestScore = Double.POSITIVE_INFINITY - - for (i in 0 until n) { - val triplet = listOf(angles[i % n], angles[(i + 1) % n], angles[(i + 2) % n]) - if (triplet.all { it in angleMin..angleMax }) { - val a = points[(i + n - 1) % n] - val b = points[i] - val c = points[(i + 1) % n] - val d = points[(i + 2) % n] - val e = points[(i + 3) % n] - - val inter = lineIntersection2(a, b, d, e) ?: continue - - val quad = listOf(b, c, d, inter) - - // ensure inside image bounds - if (quad.any { it.x < 0 || it.x >= imgWidth || it.y < 0 || it.y >= imgHeight }) continue - - // ensure convex - if (!isConvex(quad)) continue - - val score = quadAngleError(quad) - if (score < bestScore) { - bestScore = score - bestQuad = quad - } - } - } - return bestQuad -} - -fun angleBetween(v1: Point, v2: Point): Float { - val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f - val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f - val dot = (v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2) - val cosAngle = dot.coerceIn(-1.0, 1.0) - return Math.toDegrees(acos(cosAngle).toDouble()).toFloat() -} - -fun orientedAngle(a: Point, b: Point, c: Point): Double { - val v1 = Point(a.x - b.x, a.y - b.y) - val v2 = Point(c.x - b.x, c.y - b.y) - val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f - val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f - val dot = ((v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)).coerceIn(-1.0, 1.0) - val cross = v1.x * v2.y - v1.y * v2.x - var angle = Math.toDegrees(acos(dot)) - if (cross < 0) angle = 360.0 - angle - return angle -} - -fun lineIntersection2(p1: Point, p2: Point, p3: Point, p4: Point): Point? { - val denom = (p1.x - p2.x) * (p3.y - p4.y) - (p1.y - p2.y) * (p3.x - p4.x) - if (abs(denom) < 1e-6f) return null - val numX = (p1.x * p2.y - p1.y * p2.x) - val numY = (p3.x * p4.y - p3.y * p4.x) - val px = (numX * (p3.x - p4.x) - (p1.x - p2.x) * numY) / denom - val py = (numX * (p3.y - p4.y) - (p1.y - p2.y) * numY) / denom - return Point(px, py) -} - -fun quadAngleError(quad: List): Double { - var err = 0.0 - for (i in 0 until 4) { - val a = quad[(i + 3) % 4] - val b = quad[i] - val c = quad[(i + 1) % 4] - val ang = angleBetween(Point(a.x - b.x, a.y - b.y), Point(c.x - b.x, c.y - b.y)) - err += abs(ang - 90.0) - } - return err -} - -fun isConvex(quad: List): Boolean { - if (quad.size != 4) return false - var sign = 0 - for (i in quad.indices) { - val a = quad[i] - val b = quad[(i + 1) % 4] - val c = quad[(i + 2) % 4] - val cross = (b.x - a.x) * (c.y - b.y) - (b.y - a.y) * (c.x - b.x) - val currentSign = cross.sign.toInt() - if (sign == 0 && currentSign != 0) { - sign = currentSign - } else if (currentSign != 0 && currentSign != sign) { - return false - } - } - return true -} -