diff --git a/app/src/main/java/org/fairscan/app/DocumentDetection.kt b/app/src/main/java/org/fairscan/app/DocumentDetection.kt index c475d36..afe9121 100644 --- a/app/src/main/java/org/fairscan/app/DocumentDetection.kt +++ b/app/src/main/java/org/fairscan/app/DocumentDetection.kt @@ -16,8 +16,13 @@ package org.fairscan.app import android.graphics.Bitmap import androidx.core.graphics.createBitmap +import org.fairscan.app.ImageSegmentationService.Segmentation +import org.fairscan.app.quad.detectDocumentQuadFromProbmap +import org.fairscan.app.quad.findQuadFromRightAngles +import org.fairscan.app.quad.minAreaRect import org.opencv.android.Utils import org.opencv.core.Core +import org.opencv.core.CvType import org.opencv.core.Mat import org.opencv.core.MatOfPoint import org.opencv.core.MatOfPoint2f @@ -26,14 +31,39 @@ import org.opencv.imgproc.Imgproc import kotlin.math.abs import kotlin.math.max -fun detectDocumentQuad(mask: Bitmap, minQuadAreaRatio: Double = 0.02): Quad? { - val mat = Mat() - Utils.bitmapToMat(mask, mat) +fun detectDocumentQuad(mask: Segmentation, isLiveAnalysis: Boolean, minQuadAreaRatio: Double = 0.02): Quad? { + val mat = mask.toMat() + val (biggest: MatOfPoint2f?, area) = biggestContour(mat) + var vertices: List? + if (biggest != null && biggest.total() == 4L && area > mask.width * mask.height * minQuadAreaRatio) { + vertices = biggest.toList()?.map { Point(it.x, it.y) } + } else { - val gray = Mat() - Imgproc.cvtColor(mat, gray, Imgproc.COLOR_BGR2GRAY) + // Fallback 1: adjust threshold + val thresholds = + if (isLiveAnalysis) listOf(25.0, 50.0, 75.0) else (0..12).map { 0.2 + it * 0.05 } + vertices = detectDocumentQuadFromProbmap(mat, thresholds) + ?.map { Point(it.x, it.y) } + if (vertices == null && biggest != null && biggest.total() > 4) { - val refinedMask = refineMask(gray) + // Fallback 2: look for right angles + val polygon = biggest.toList().map { Point(it.x, it.y) } + vertices = findQuadFromRightAngles(polygon, mask.width, mask.height) + if (vertices == null && !isLiveAnalysis) { + + // Fallback 3: bounding rectangle + vertices = minAreaRect(polygon, mask.width, mask.height) + } + } + } + return if (vertices?.size == 4) createQuad(vertices) else null +} + +private fun biggestContour(mat: Mat): Pair { + val mat8u = Mat() + mat.convertTo(mat8u, CvType.CV_8UC1, 255.0) + + val refinedMask = refineMask(mat8u) val blurred = Mat() Imgproc.GaussianBlur(refinedMask, blurred, Size(5.0, 5.0), 0.0) @@ -54,21 +84,13 @@ fun detectDocumentQuad(mask: Bitmap, minQuadAreaRatio: Double = 0.02): Quad? { val approx = MatOfPoint2f() Imgproc.approxPolyDP(contour2f, approx, 0.02 * peri, true) - if (approx.total() == 4L) { - val area = abs(Imgproc.contourArea(approx)) - if (area > maxArea) { - maxArea = area - biggest = approx - } + val area = abs(Imgproc.contourArea(approx)) + if (area > maxArea) { + maxArea = area + biggest = approx } } - - if (maxArea < mask.width * mask.height * minQuadAreaRatio) { - return null - } - - val vertices = biggest?.toList()?.map { Point(it.x, it.y) } - return if (vertices?.size == 4) createQuad(vertices) else null + return Pair(biggest, maxArea) } /** @@ -77,7 +99,7 @@ fun detectDocumentQuad(mask: Bitmap, minQuadAreaRatio: Double = 0.02): Quad? { fun refineMask(original: Mat): Mat { // Step 0: Ensure the mask is binary (just in case) val binaryMask = Mat() - Imgproc.threshold(original, binaryMask, 0.0, 255.0, Imgproc.THRESH_BINARY) + Imgproc.threshold(original, binaryMask, 128.0, 255.0, Imgproc.THRESH_BINARY) // Step 1: Closing (fills small holes) val kernelClose = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0)) diff --git a/app/src/main/java/org/fairscan/app/ImageSegmentation.kt b/app/src/main/java/org/fairscan/app/ImageSegmentation.kt index 56a3dc4..a753594 100644 --- a/app/src/main/java/org/fairscan/app/ImageSegmentation.kt +++ b/app/src/main/java/org/fairscan/app/ImageSegmentation.kt @@ -28,6 +28,8 @@ import kotlinx.coroutines.isActive import kotlinx.coroutines.sync.Mutex import kotlinx.coroutines.sync.withLock import kotlinx.coroutines.withContext +import org.opencv.core.CvType +import org.opencv.core.Mat import org.tensorflow.lite.DataType import org.tensorflow.lite.Interpreter import org.tensorflow.lite.support.common.FileUtil @@ -116,29 +118,36 @@ class ImageSegmentationService(private val context: Context) { outputBuffer.rewind() interpreter.run(tensorImage.tensorBuffer.buffer, outputBuffer) outputBuffer.rewind() - val mask = generateMaskFromOutputBuffer(outputBuffer, w, h) - return Segmentation(mask) + return Segmentation(outputToArray(outputBuffer, w, h), w, h) } - private fun generateMaskFromOutputBuffer(outputBuffer: ByteBuffer, width: Int, height: Int): Bitmap { + private fun outputToArray(outputBuffer: ByteBuffer, width: Int, height: Int): FloatArray { outputBuffer.rewind() - val floatArray = FloatArray(width * height) - outputBuffer.asFloatBuffer()[floatArray] - - val pixels = IntArray(width * height) - for (i in floatArray.indices) { - val value = floatArray[i].coerceIn(0f, 1f) - val gray = (value * 255).toInt() - pixels[i] = Color.rgb(gray, gray, gray) + val maskFloats = FloatArray(width * height) + outputBuffer.asFloatBuffer()[maskFloats] + for (i in maskFloats.indices) { + maskFloats[i] = maskFloats[i].coerceIn(0f, 1f) } - - val bitmap = createBitmap(width, height, Bitmap.Config.ARGB_8888) - bitmap.setPixels(pixels, 0, width, 0, 0, width, height) - return bitmap + return maskFloats } - data class Segmentation(val mask: Bitmap) { - fun toBinaryMask(): Bitmap = mask + data class Segmentation(private val probmap: FloatArray, val width: Int, val height: Int) { + fun get(x: Int, y: Int): Float = probmap[y * width + x] + fun toBinaryMask(): Bitmap { + val bmp = createBitmap(width, height, Bitmap.Config.ARGB_8888) + val pixels = IntArray(width * height) + for (i in probmap.indices) { + val v = (probmap[i].coerceIn(0f, 1f) * 255f).toInt() + pixels[i] = Color.rgb(v, v, v) + } + bmp.setPixels(pixels, 0, width, 0, 0, width, height) + return bmp + } + fun toMat(): Mat { + val mat = Mat(height, width, CvType.CV_32FC1) + mat.put(0, 0, probmap) + return mat + } } data class SegmentationResult( diff --git a/app/src/main/java/org/fairscan/app/MainViewModel.kt b/app/src/main/java/org/fairscan/app/MainViewModel.kt index e68b8b9..1e2ae1a 100644 --- a/app/src/main/java/org/fairscan/app/MainViewModel.kt +++ b/app/src/main/java/org/fairscan/app/MainViewModel.kt @@ -105,11 +105,12 @@ class MainViewModel( imageSegmentationService.segmentation .filterNotNull() .map { + // TODO Should we really call toBinaryMask if it's used only in debug mode? val binaryMask = it.segmentation.toBinaryMask() LiveAnalysisState( inferenceTime = it.inferenceTime, binaryMask = binaryMask, - documentQuad = detectDocumentQuad(binaryMask), + documentQuad = detectDocumentQuad(it.segmentation, isLiveAnalysis = true), timestamp = System.currentTimeMillis(), ) } @@ -190,8 +191,8 @@ class MainViewModel( val bitmap = imageProxy.toBitmap() val segmentation = imageSegmentationService.runSegmentationAndReturn(bitmap, 0) if (segmentation != null) { - val mask = segmentation.segmentation.toBinaryMask() - var quad = detectDocumentQuad(mask) + val mask = segmentation.segmentation + var quad = detectDocumentQuad(mask, isLiveAnalysis = false) if (quad == null) { val now = System.currentTimeMillis() lastSuccessfulLiveAnalysisState?.timestamp?.let { diff --git a/app/src/main/java/org/fairscan/app/quad/AdaptiveThreshold.kt b/app/src/main/java/org/fairscan/app/quad/AdaptiveThreshold.kt new file mode 100644 index 0000000..da5ed18 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/quad/AdaptiveThreshold.kt @@ -0,0 +1,129 @@ +/* + * Copyright 2025 Pierre-Yves Nicolas + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.quad + +import org.opencv.core.Mat +import org.opencv.core.CvType +import org.opencv.core.Size +import org.opencv.core.Point +import org.opencv.core.Scalar +import org.opencv.core.MatOfPoint +import org.opencv.core.MatOfPoint2f +import org.opencv.core.Core +import org.opencv.imgproc.Imgproc +import kotlin.math.abs + +// Look for a threshold for which we find a quad in the mask +fun detectDocumentQuadFromProbmap( + probmap: Mat, + thresholds: List, + useOtsu: Boolean = true, + minQuadAreaRatio: Double = 0.02 +): List? { + val probmapU8 = Mat() + probmap.convertTo(probmapU8, CvType.CV_8U, 255.0) + val probmapSmooth = Mat() + Imgproc.GaussianBlur(probmapU8, probmapSmooth, Size(3.0, 3.0), 0.0) + + var bestScore = 0.0 + var bestQuad: List? = null + + // 1) Otsu + if (useOtsu) { + val otsu = Mat() + Imgproc.threshold(probmapSmooth, otsu, 0.0, 255.0, Imgproc.THRESH_BINARY + Imgproc.THRESH_OTSU) + val quad = findQuadFromBinaryMask(otsu, minQuadAreaRatio) + if (quad != null) { + val probFloat = Mat() + probmap.convertTo(probFloat, CvType.CV_32F) + val sc = scoreQuadAgainstProbmap(quad, probFloat) + if (sc > bestScore) { + bestScore = sc + bestQuad = quad + } + } + } + + // 2) Threshold sweep + for (thr in thresholds) { + val bin = Mat() + Imgproc.threshold(probmapSmooth, bin, thr * 255.0, 255.0, Imgproc.THRESH_BINARY) + val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0)) + Imgproc.morphologyEx(bin, bin, Imgproc.MORPH_CLOSE, kernel) + val quad = findQuadFromBinaryMask(bin, minQuadAreaRatio) + if (quad != null) { + val probFloat = Mat() + probmap.convertTo(probFloat, CvType.CV_32F) + val sc = scoreQuadAgainstProbmap(quad, probFloat) + if (sc > bestScore) { + bestScore = sc + bestQuad = quad + } + } + } + + return bestQuad +} + +// Fill polygon and return binary mask (0/1) +fun makePolygonMask(size: Size, polygon: List): Mat { + val mask = Mat.zeros(size, CvType.CV_8U) + val pts = MatOfPoint(*polygon.toTypedArray()) + Imgproc.fillPoly(mask, listOf(pts), Scalar(1.0)) + return mask +} + +// Compute score between quad and probmap +fun scoreQuadAgainstProbmap(quad: List, probmap: Mat): Double { + val mask = makePolygonMask(probmap.size(), quad) + val maskFloat = Mat() + mask.convertTo(maskFloat, CvType.CV_32F) + val masked = Mat() + Core.multiply(probmap, maskFloat, masked) + val meanProb = Core.sumElems(masked).`val`[0] / Core.sumElems(maskFloat).`val`[0] + val areaRatio = Core.sumElems(maskFloat).`val`[0] / (probmap.rows() * probmap.cols()) + return meanProb * (0.7 + 0.3 * areaRatio) +} + +// Find largest quadrilateral in a binary mask +fun findQuadFromBinaryMask(binMask: Mat, minQuadAreaRatio: Double = 0.02): List? { + val blurred = Mat() + Imgproc.GaussianBlur(binMask, blurred, Size(5.0, 5.0), 0.0) + val edges = Mat() + Imgproc.Canny(blurred, edges, 75.0, 200.0) + + val contours = mutableListOf() + Imgproc.findContours(edges, contours, Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE) + + var biggest: MatOfPoint2f? = null + var maxArea = 0.0 + for (cnt in contours) { + val cnt2f = MatOfPoint2f(*cnt.toArray()) + val peri = Imgproc.arcLength(cnt2f, true) + val approx = MatOfPoint2f() + Imgproc.approxPolyDP(cnt2f, approx, 0.02 * peri, true) + if (approx.rows() == 4) { + val area = abs(Imgproc.contourArea(approx)) + if (area > maxArea) { + maxArea = area + biggest = approx + } + } + } + val totalArea = binMask.rows() * binMask.cols().toDouble() + return if (maxArea > totalArea * minQuadAreaRatio && biggest != null) { + biggest.toList() + } else null +} diff --git a/app/src/main/java/org/fairscan/app/quad/MinAreaRect.kt b/app/src/main/java/org/fairscan/app/quad/MinAreaRect.kt new file mode 100644 index 0000000..fcf824d --- /dev/null +++ b/app/src/main/java/org/fairscan/app/quad/MinAreaRect.kt @@ -0,0 +1,111 @@ +/* + * Copyright 2025 Pierre-Yves Nicolas + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.quad + +import org.fairscan.app.Point +import kotlin.math.cos +import kotlin.math.sin + +// Look for a minimal rectangle that covers a given polygon +fun minAreaRect(polygon: List, imgWidth: Int? = null, imgHeight: Int? = null): List? { + if (polygon.size < 3) return null + + val hull = convexHull(polygon) + if (hull.size < 3) return hull + + var bestArea = Double.POSITIVE_INFINITY + var bestRect: List? = null + + // Test 90 rotation angles between 0 and π/2 + for (deg in 0 until 90) { + val angle = Math.toRadians(deg.toDouble()) + val cosA = cos(angle) + val sinA = sin(angle) + + // Rotation matrix + val rotX = { p: Point -> p.x * cosA - p.y * sinA } + val rotY = { p: Point -> p.x * sinA + p.y * cosA } + + val rotated = hull.map { Point(rotX(it), rotY(it)) } + + val minX = rotated.minOf { it.x } + val maxX = rotated.maxOf { it.x } + val minY = rotated.minOf { it.y } + val maxY = rotated.maxOf { it.y } + + val area = (maxX - minX) * (maxY - minY) + if (area < bestArea) { + bestArea = area + + val rectRot = listOf( + Point(minX, minY), + Point(maxX, minY), + Point(maxX, maxY), + Point(minX, maxY) + ) + + // Apply inverse rotation + val invX = { p: Point -> p.x * cosA + p.y * sinA } + val invY = { p: Point -> -p.x * sinA + p.y * cosA } + val rect = rectRot.map { Point(invX(it), invY(it)) } + + bestRect = rect + } + } + + if (bestRect == null) return null + + // Optionally clip within image bounds + if (imgWidth != null && imgHeight != null) { + val w = imgWidth - 1.0 + val h = imgHeight - 1.0 + return bestRect.map { + Point(it.x.coerceIn(0.0, w), it.y.coerceIn(0.0, h)) + } + } + + return bestRect +} + +fun convexHull(points: List): List { + val unique = points.distinctBy { Pair(it.x, it.y) } + if (unique.size <= 3) return unique + + val sorted = unique.sortedWith(compareBy({ it.x }, { it.y })) + + fun cross(o: Point, a: Point, b: Point): Double { + return (a.x - o.x) * (b.y - o.y) - (a.y - o.y) * (b.x - o.x) + } + + val lower = mutableListOf() + for (p in sorted) { + while (lower.size >= 2 && cross(lower[lower.size - 2], lower.last(), p) <= 0f) { + lower.removeAt(lower.lastIndex) + } + lower.add(p) + } + + val upper = mutableListOf() + for (p in sorted.asReversed()) { + while (upper.size >= 2 && cross(upper[upper.size - 2], upper.last(), p) <= 0f) { + upper.removeAt(upper.lastIndex) + } + upper.add(p) + } + + // Remove last element of each list to avoid duplication + val hull = lower.dropLast(1) + upper.dropLast(1) + return hull +} diff --git a/app/src/main/java/org/fairscan/app/quad/RightAngles.kt b/app/src/main/java/org/fairscan/app/quad/RightAngles.kt new file mode 100644 index 0000000..61666bb --- /dev/null +++ b/app/src/main/java/org/fairscan/app/quad/RightAngles.kt @@ -0,0 +1,133 @@ +/* + * Copyright 2025 Pierre-Yves Nicolas + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.quad + +import org.fairscan.app.Point +import kotlin.math.abs +import kotlin.math.acos +import kotlin.math.sqrt +import kotlin.math.sign + +// Look for 3 consecutive angles that are (almost) right angles +fun findQuadFromRightAngles( + points: List, + imgWidth: Int, + imgHeight: Int, + angleMin: Float = 60f, + angleMax: Float = 120f +): List? { + if (points.size < 4) return null + val n = points.size + + val angles = mutableListOf() + for (i in 0 until n) { + val a = points[(i + n - 1) % n] + val b = points[i] + val c = points[(i + 1) % n] + angles.add(orientedAngle(a, b, c)) + } + + var bestQuad: List? = null + var bestScore = Double.POSITIVE_INFINITY + + for (i in 0 until n) { + val triplet = listOf(angles[i % n], angles[(i + 1) % n], angles[(i + 2) % n]) + if (triplet.all { it in angleMin..angleMax }) { + val a = points[(i + n - 1) % n] + val b = points[i] + val c = points[(i + 1) % n] + val d = points[(i + 2) % n] + val e = points[(i + 3) % n] + + val inter = lineIntersection2(a, b, d, e) ?: continue + + val quad = listOf(b, c, d, inter) + + // ensure inside image bounds + if (quad.any { it.x < 0 || it.x >= imgWidth || it.y < 0 || it.y >= imgHeight }) continue + + // ensure convex + if (!isConvex(quad)) continue + + val score = quadAngleError(quad) + if (score < bestScore) { + bestScore = score + bestQuad = quad + } + } + } + return bestQuad +} + +fun angleBetween(v1: Point, v2: Point): Float { + val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f + val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f + val dot = (v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2) + val cosAngle = dot.coerceIn(-1.0, 1.0) + return Math.toDegrees(acos(cosAngle).toDouble()).toFloat() +} + +fun orientedAngle(a: Point, b: Point, c: Point): Double { + val v1 = Point(a.x - b.x, a.y - b.y) + val v2 = Point(c.x - b.x, c.y - b.y) + val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f + val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f + val dot = ((v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)).coerceIn(-1.0, 1.0) + val cross = v1.x * v2.y - v1.y * v2.x + var angle = Math.toDegrees(acos(dot)) + if (cross < 0) angle = 360.0 - angle + return angle +} + +fun lineIntersection2(p1: Point, p2: Point, p3: Point, p4: Point): Point? { + val denom = (p1.x - p2.x) * (p3.y - p4.y) - (p1.y - p2.y) * (p3.x - p4.x) + if (abs(denom) < 1e-6f) return null + val numX = (p1.x * p2.y - p1.y * p2.x) + val numY = (p3.x * p4.y - p3.y * p4.x) + val px = (numX * (p3.x - p4.x) - (p1.x - p2.x) * numY) / denom + val py = (numX * (p3.y - p4.y) - (p1.y - p2.y) * numY) / denom + return Point(px, py) +} + +fun quadAngleError(quad: List): Double { + var err = 0.0 + for (i in 0 until 4) { + val a = quad[(i + 3) % 4] + val b = quad[i] + val c = quad[(i + 1) % 4] + val ang = angleBetween(Point(a.x - b.x, a.y - b.y), Point(c.x - b.x, c.y - b.y)) + err += abs(ang - 90.0) + } + return err +} + +fun isConvex(quad: List): Boolean { + if (quad.size != 4) return false + var sign = 0 + for (i in quad.indices) { + val a = quad[i] + val b = quad[(i + 1) % 4] + val c = quad[(i + 2) % 4] + val cross = (b.x - a.x) * (c.y - b.y) - (b.y - a.y) * (c.x - b.x) + val currentSign = cross.sign.toInt() + if (sign == 0 && currentSign != 0) { + sign = currentSign + } else if (currentSign != 0 && currentSign != sign) { + return false + } + } + return true +} +