Quad detection: new fallback algorithms

This commit is contained in:
Pierre-Yves Nicolas
2025-11-09 23:35:30 +01:00
committed by pynicolas
parent df3e2dc3e2
commit 5b7d76e9c4
6 changed files with 445 additions and 40 deletions

View File

@@ -16,8 +16,13 @@ package org.fairscan.app
import android.graphics.Bitmap import android.graphics.Bitmap
import androidx.core.graphics.createBitmap import androidx.core.graphics.createBitmap
import org.fairscan.app.ImageSegmentationService.Segmentation
import org.fairscan.app.quad.detectDocumentQuadFromProbmap
import org.fairscan.app.quad.findQuadFromRightAngles
import org.fairscan.app.quad.minAreaRect
import org.opencv.android.Utils import org.opencv.android.Utils
import org.opencv.core.Core import org.opencv.core.Core
import org.opencv.core.CvType
import org.opencv.core.Mat import org.opencv.core.Mat
import org.opencv.core.MatOfPoint import org.opencv.core.MatOfPoint
import org.opencv.core.MatOfPoint2f import org.opencv.core.MatOfPoint2f
@@ -26,14 +31,39 @@ import org.opencv.imgproc.Imgproc
import kotlin.math.abs import kotlin.math.abs
import kotlin.math.max import kotlin.math.max
fun detectDocumentQuad(mask: Bitmap, minQuadAreaRatio: Double = 0.02): Quad? { fun detectDocumentQuad(mask: Segmentation, isLiveAnalysis: Boolean, minQuadAreaRatio: Double = 0.02): Quad? {
val mat = Mat() val mat = mask.toMat()
Utils.bitmapToMat(mask, mat) val (biggest: MatOfPoint2f?, area) = biggestContour(mat)
var vertices: List<Point>?
if (biggest != null && biggest.total() == 4L && area > mask.width * mask.height * minQuadAreaRatio) {
vertices = biggest.toList()?.map { Point(it.x, it.y) }
} else {
val gray = Mat() // Fallback 1: adjust threshold
Imgproc.cvtColor(mat, gray, Imgproc.COLOR_BGR2GRAY) val thresholds =
if (isLiveAnalysis) listOf(25.0, 50.0, 75.0) else (0..12).map { 0.2 + it * 0.05 }
vertices = detectDocumentQuadFromProbmap(mat, thresholds)
?.map { Point(it.x, it.y) }
if (vertices == null && biggest != null && biggest.total() > 4) {
val refinedMask = refineMask(gray) // Fallback 2: look for right angles
val polygon = biggest.toList().map { Point(it.x, it.y) }
vertices = findQuadFromRightAngles(polygon, mask.width, mask.height)
if (vertices == null && !isLiveAnalysis) {
// Fallback 3: bounding rectangle
vertices = minAreaRect(polygon, mask.width, mask.height)
}
}
}
return if (vertices?.size == 4) createQuad(vertices) else null
}
private fun biggestContour(mat: Mat): Pair<MatOfPoint2f?, Double> {
val mat8u = Mat()
mat.convertTo(mat8u, CvType.CV_8UC1, 255.0)
val refinedMask = refineMask(mat8u)
val blurred = Mat() val blurred = Mat()
Imgproc.GaussianBlur(refinedMask, blurred, Size(5.0, 5.0), 0.0) Imgproc.GaussianBlur(refinedMask, blurred, Size(5.0, 5.0), 0.0)
@@ -54,21 +84,13 @@ fun detectDocumentQuad(mask: Bitmap, minQuadAreaRatio: Double = 0.02): Quad? {
val approx = MatOfPoint2f() val approx = MatOfPoint2f()
Imgproc.approxPolyDP(contour2f, approx, 0.02 * peri, true) Imgproc.approxPolyDP(contour2f, approx, 0.02 * peri, true)
if (approx.total() == 4L) { val area = abs(Imgproc.contourArea(approx))
val area = abs(Imgproc.contourArea(approx)) if (area > maxArea) {
if (area > maxArea) { maxArea = area
maxArea = area biggest = approx
biggest = approx
}
} }
} }
return Pair(biggest, maxArea)
if (maxArea < mask.width * mask.height * minQuadAreaRatio) {
return null
}
val vertices = biggest?.toList()?.map { Point(it.x, it.y) }
return if (vertices?.size == 4) createQuad(vertices) else null
} }
/** /**
@@ -77,7 +99,7 @@ fun detectDocumentQuad(mask: Bitmap, minQuadAreaRatio: Double = 0.02): Quad? {
fun refineMask(original: Mat): Mat { fun refineMask(original: Mat): Mat {
// Step 0: Ensure the mask is binary (just in case) // Step 0: Ensure the mask is binary (just in case)
val binaryMask = Mat() val binaryMask = Mat()
Imgproc.threshold(original, binaryMask, 0.0, 255.0, Imgproc.THRESH_BINARY) Imgproc.threshold(original, binaryMask, 128.0, 255.0, Imgproc.THRESH_BINARY)
// Step 1: Closing (fills small holes) // Step 1: Closing (fills small holes)
val kernelClose = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0)) val kernelClose = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0))

View File

@@ -28,6 +28,8 @@ import kotlinx.coroutines.isActive
import kotlinx.coroutines.sync.Mutex import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock import kotlinx.coroutines.sync.withLock
import kotlinx.coroutines.withContext import kotlinx.coroutines.withContext
import org.opencv.core.CvType
import org.opencv.core.Mat
import org.tensorflow.lite.DataType import org.tensorflow.lite.DataType
import org.tensorflow.lite.Interpreter import org.tensorflow.lite.Interpreter
import org.tensorflow.lite.support.common.FileUtil import org.tensorflow.lite.support.common.FileUtil
@@ -116,29 +118,36 @@ class ImageSegmentationService(private val context: Context) {
outputBuffer.rewind() outputBuffer.rewind()
interpreter.run(tensorImage.tensorBuffer.buffer, outputBuffer) interpreter.run(tensorImage.tensorBuffer.buffer, outputBuffer)
outputBuffer.rewind() outputBuffer.rewind()
val mask = generateMaskFromOutputBuffer(outputBuffer, w, h) return Segmentation(outputToArray(outputBuffer, w, h), w, h)
return Segmentation(mask)
} }
private fun generateMaskFromOutputBuffer(outputBuffer: ByteBuffer, width: Int, height: Int): Bitmap { private fun outputToArray(outputBuffer: ByteBuffer, width: Int, height: Int): FloatArray {
outputBuffer.rewind() outputBuffer.rewind()
val floatArray = FloatArray(width * height) val maskFloats = FloatArray(width * height)
outputBuffer.asFloatBuffer()[floatArray] outputBuffer.asFloatBuffer()[maskFloats]
for (i in maskFloats.indices) {
val pixels = IntArray(width * height) maskFloats[i] = maskFloats[i].coerceIn(0f, 1f)
for (i in floatArray.indices) {
val value = floatArray[i].coerceIn(0f, 1f)
val gray = (value * 255).toInt()
pixels[i] = Color.rgb(gray, gray, gray)
} }
return maskFloats
val bitmap = createBitmap(width, height, Bitmap.Config.ARGB_8888)
bitmap.setPixels(pixels, 0, width, 0, 0, width, height)
return bitmap
} }
data class Segmentation(val mask: Bitmap) { data class Segmentation(private val probmap: FloatArray, val width: Int, val height: Int) {
fun toBinaryMask(): Bitmap = mask fun get(x: Int, y: Int): Float = probmap[y * width + x]
fun toBinaryMask(): Bitmap {
val bmp = createBitmap(width, height, Bitmap.Config.ARGB_8888)
val pixels = IntArray(width * height)
for (i in probmap.indices) {
val v = (probmap[i].coerceIn(0f, 1f) * 255f).toInt()
pixels[i] = Color.rgb(v, v, v)
}
bmp.setPixels(pixels, 0, width, 0, 0, width, height)
return bmp
}
fun toMat(): Mat {
val mat = Mat(height, width, CvType.CV_32FC1)
mat.put(0, 0, probmap)
return mat
}
} }
data class SegmentationResult( data class SegmentationResult(

View File

@@ -105,11 +105,12 @@ class MainViewModel(
imageSegmentationService.segmentation imageSegmentationService.segmentation
.filterNotNull() .filterNotNull()
.map { .map {
// TODO Should we really call toBinaryMask if it's used only in debug mode?
val binaryMask = it.segmentation.toBinaryMask() val binaryMask = it.segmentation.toBinaryMask()
LiveAnalysisState( LiveAnalysisState(
inferenceTime = it.inferenceTime, inferenceTime = it.inferenceTime,
binaryMask = binaryMask, binaryMask = binaryMask,
documentQuad = detectDocumentQuad(binaryMask), documentQuad = detectDocumentQuad(it.segmentation, isLiveAnalysis = true),
timestamp = System.currentTimeMillis(), timestamp = System.currentTimeMillis(),
) )
} }
@@ -190,8 +191,8 @@ class MainViewModel(
val bitmap = imageProxy.toBitmap() val bitmap = imageProxy.toBitmap()
val segmentation = imageSegmentationService.runSegmentationAndReturn(bitmap, 0) val segmentation = imageSegmentationService.runSegmentationAndReturn(bitmap, 0)
if (segmentation != null) { if (segmentation != null) {
val mask = segmentation.segmentation.toBinaryMask() val mask = segmentation.segmentation
var quad = detectDocumentQuad(mask) var quad = detectDocumentQuad(mask, isLiveAnalysis = false)
if (quad == null) { if (quad == null) {
val now = System.currentTimeMillis() val now = System.currentTimeMillis()
lastSuccessfulLiveAnalysisState?.timestamp?.let { lastSuccessfulLiveAnalysisState?.timestamp?.let {

View File

@@ -0,0 +1,129 @@
/*
* Copyright 2025 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.app.quad
import org.opencv.core.Mat
import org.opencv.core.CvType
import org.opencv.core.Size
import org.opencv.core.Point
import org.opencv.core.Scalar
import org.opencv.core.MatOfPoint
import org.opencv.core.MatOfPoint2f
import org.opencv.core.Core
import org.opencv.imgproc.Imgproc
import kotlin.math.abs
// Look for a threshold for which we find a quad in the mask
fun detectDocumentQuadFromProbmap(
probmap: Mat,
thresholds: List<Double>,
useOtsu: Boolean = true,
minQuadAreaRatio: Double = 0.02
): List<Point>? {
val probmapU8 = Mat()
probmap.convertTo(probmapU8, CvType.CV_8U, 255.0)
val probmapSmooth = Mat()
Imgproc.GaussianBlur(probmapU8, probmapSmooth, Size(3.0, 3.0), 0.0)
var bestScore = 0.0
var bestQuad: List<Point>? = null
// 1) Otsu
if (useOtsu) {
val otsu = Mat()
Imgproc.threshold(probmapSmooth, otsu, 0.0, 255.0, Imgproc.THRESH_BINARY + Imgproc.THRESH_OTSU)
val quad = findQuadFromBinaryMask(otsu, minQuadAreaRatio)
if (quad != null) {
val probFloat = Mat()
probmap.convertTo(probFloat, CvType.CV_32F)
val sc = scoreQuadAgainstProbmap(quad, probFloat)
if (sc > bestScore) {
bestScore = sc
bestQuad = quad
}
}
}
// 2) Threshold sweep
for (thr in thresholds) {
val bin = Mat()
Imgproc.threshold(probmapSmooth, bin, thr * 255.0, 255.0, Imgproc.THRESH_BINARY)
val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0))
Imgproc.morphologyEx(bin, bin, Imgproc.MORPH_CLOSE, kernel)
val quad = findQuadFromBinaryMask(bin, minQuadAreaRatio)
if (quad != null) {
val probFloat = Mat()
probmap.convertTo(probFloat, CvType.CV_32F)
val sc = scoreQuadAgainstProbmap(quad, probFloat)
if (sc > bestScore) {
bestScore = sc
bestQuad = quad
}
}
}
return bestQuad
}
// Fill polygon and return binary mask (0/1)
fun makePolygonMask(size: Size, polygon: List<Point>): Mat {
val mask = Mat.zeros(size, CvType.CV_8U)
val pts = MatOfPoint(*polygon.toTypedArray())
Imgproc.fillPoly(mask, listOf(pts), Scalar(1.0))
return mask
}
// Compute score between quad and probmap
fun scoreQuadAgainstProbmap(quad: List<Point>, probmap: Mat): Double {
val mask = makePolygonMask(probmap.size(), quad)
val maskFloat = Mat()
mask.convertTo(maskFloat, CvType.CV_32F)
val masked = Mat()
Core.multiply(probmap, maskFloat, masked)
val meanProb = Core.sumElems(masked).`val`[0] / Core.sumElems(maskFloat).`val`[0]
val areaRatio = Core.sumElems(maskFloat).`val`[0] / (probmap.rows() * probmap.cols())
return meanProb * (0.7 + 0.3 * areaRatio)
}
// Find largest quadrilateral in a binary mask
fun findQuadFromBinaryMask(binMask: Mat, minQuadAreaRatio: Double = 0.02): List<Point>? {
val blurred = Mat()
Imgproc.GaussianBlur(binMask, blurred, Size(5.0, 5.0), 0.0)
val edges = Mat()
Imgproc.Canny(blurred, edges, 75.0, 200.0)
val contours = mutableListOf<MatOfPoint>()
Imgproc.findContours(edges, contours, Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE)
var biggest: MatOfPoint2f? = null
var maxArea = 0.0
for (cnt in contours) {
val cnt2f = MatOfPoint2f(*cnt.toArray())
val peri = Imgproc.arcLength(cnt2f, true)
val approx = MatOfPoint2f()
Imgproc.approxPolyDP(cnt2f, approx, 0.02 * peri, true)
if (approx.rows() == 4) {
val area = abs(Imgproc.contourArea(approx))
if (area > maxArea) {
maxArea = area
biggest = approx
}
}
}
val totalArea = binMask.rows() * binMask.cols().toDouble()
return if (maxArea > totalArea * minQuadAreaRatio && biggest != null) {
biggest.toList()
} else null
}

View File

@@ -0,0 +1,111 @@
/*
* Copyright 2025 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.app.quad
import org.fairscan.app.Point
import kotlin.math.cos
import kotlin.math.sin
// Look for a minimal rectangle that covers a given polygon
fun minAreaRect(polygon: List<Point>, imgWidth: Int? = null, imgHeight: Int? = null): List<Point>? {
if (polygon.size < 3) return null
val hull = convexHull(polygon)
if (hull.size < 3) return hull
var bestArea = Double.POSITIVE_INFINITY
var bestRect: List<Point>? = null
// Test 90 rotation angles between 0 and π/2
for (deg in 0 until 90) {
val angle = Math.toRadians(deg.toDouble())
val cosA = cos(angle)
val sinA = sin(angle)
// Rotation matrix
val rotX = { p: Point -> p.x * cosA - p.y * sinA }
val rotY = { p: Point -> p.x * sinA + p.y * cosA }
val rotated = hull.map { Point(rotX(it), rotY(it)) }
val minX = rotated.minOf { it.x }
val maxX = rotated.maxOf { it.x }
val minY = rotated.minOf { it.y }
val maxY = rotated.maxOf { it.y }
val area = (maxX - minX) * (maxY - minY)
if (area < bestArea) {
bestArea = area
val rectRot = listOf(
Point(minX, minY),
Point(maxX, minY),
Point(maxX, maxY),
Point(minX, maxY)
)
// Apply inverse rotation
val invX = { p: Point -> p.x * cosA + p.y * sinA }
val invY = { p: Point -> -p.x * sinA + p.y * cosA }
val rect = rectRot.map { Point(invX(it), invY(it)) }
bestRect = rect
}
}
if (bestRect == null) return null
// Optionally clip within image bounds
if (imgWidth != null && imgHeight != null) {
val w = imgWidth - 1.0
val h = imgHeight - 1.0
return bestRect.map {
Point(it.x.coerceIn(0.0, w), it.y.coerceIn(0.0, h))
}
}
return bestRect
}
fun convexHull(points: List<Point>): List<Point> {
val unique = points.distinctBy { Pair(it.x, it.y) }
if (unique.size <= 3) return unique
val sorted = unique.sortedWith(compareBy({ it.x }, { it.y }))
fun cross(o: Point, a: Point, b: Point): Double {
return (a.x - o.x) * (b.y - o.y) - (a.y - o.y) * (b.x - o.x)
}
val lower = mutableListOf<Point>()
for (p in sorted) {
while (lower.size >= 2 && cross(lower[lower.size - 2], lower.last(), p) <= 0f) {
lower.removeAt(lower.lastIndex)
}
lower.add(p)
}
val upper = mutableListOf<Point>()
for (p in sorted.asReversed()) {
while (upper.size >= 2 && cross(upper[upper.size - 2], upper.last(), p) <= 0f) {
upper.removeAt(upper.lastIndex)
}
upper.add(p)
}
// Remove last element of each list to avoid duplication
val hull = lower.dropLast(1) + upper.dropLast(1)
return hull
}

View File

@@ -0,0 +1,133 @@
/*
* Copyright 2025 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.app.quad
import org.fairscan.app.Point
import kotlin.math.abs
import kotlin.math.acos
import kotlin.math.sqrt
import kotlin.math.sign
// Look for 3 consecutive angles that are (almost) right angles
fun findQuadFromRightAngles(
points: List<Point>,
imgWidth: Int,
imgHeight: Int,
angleMin: Float = 60f,
angleMax: Float = 120f
): List<Point>? {
if (points.size < 4) return null
val n = points.size
val angles = mutableListOf<Double>()
for (i in 0 until n) {
val a = points[(i + n - 1) % n]
val b = points[i]
val c = points[(i + 1) % n]
angles.add(orientedAngle(a, b, c))
}
var bestQuad: List<Point>? = null
var bestScore = Double.POSITIVE_INFINITY
for (i in 0 until n) {
val triplet = listOf(angles[i % n], angles[(i + 1) % n], angles[(i + 2) % n])
if (triplet.all { it in angleMin..angleMax }) {
val a = points[(i + n - 1) % n]
val b = points[i]
val c = points[(i + 1) % n]
val d = points[(i + 2) % n]
val e = points[(i + 3) % n]
val inter = lineIntersection2(a, b, d, e) ?: continue
val quad = listOf(b, c, d, inter)
// ensure inside image bounds
if (quad.any { it.x < 0 || it.x >= imgWidth || it.y < 0 || it.y >= imgHeight }) continue
// ensure convex
if (!isConvex(quad)) continue
val score = quadAngleError(quad)
if (score < bestScore) {
bestScore = score
bestQuad = quad
}
}
}
return bestQuad
}
fun angleBetween(v1: Point, v2: Point): Float {
val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f
val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f
val dot = (v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)
val cosAngle = dot.coerceIn(-1.0, 1.0)
return Math.toDegrees(acos(cosAngle).toDouble()).toFloat()
}
fun orientedAngle(a: Point, b: Point, c: Point): Double {
val v1 = Point(a.x - b.x, a.y - b.y)
val v2 = Point(c.x - b.x, c.y - b.y)
val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f
val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f
val dot = ((v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)).coerceIn(-1.0, 1.0)
val cross = v1.x * v2.y - v1.y * v2.x
var angle = Math.toDegrees(acos(dot))
if (cross < 0) angle = 360.0 - angle
return angle
}
fun lineIntersection2(p1: Point, p2: Point, p3: Point, p4: Point): Point? {
val denom = (p1.x - p2.x) * (p3.y - p4.y) - (p1.y - p2.y) * (p3.x - p4.x)
if (abs(denom) < 1e-6f) return null
val numX = (p1.x * p2.y - p1.y * p2.x)
val numY = (p3.x * p4.y - p3.y * p4.x)
val px = (numX * (p3.x - p4.x) - (p1.x - p2.x) * numY) / denom
val py = (numX * (p3.y - p4.y) - (p1.y - p2.y) * numY) / denom
return Point(px, py)
}
fun quadAngleError(quad: List<Point>): Double {
var err = 0.0
for (i in 0 until 4) {
val a = quad[(i + 3) % 4]
val b = quad[i]
val c = quad[(i + 1) % 4]
val ang = angleBetween(Point(a.x - b.x, a.y - b.y), Point(c.x - b.x, c.y - b.y))
err += abs(ang - 90.0)
}
return err
}
fun isConvex(quad: List<Point>): Boolean {
if (quad.size != 4) return false
var sign = 0
for (i in quad.indices) {
val a = quad[i]
val b = quad[(i + 1) % 4]
val c = quad[(i + 2) % 4]
val cross = (b.x - a.x) * (c.y - b.y) - (b.y - a.y) * (c.x - b.x)
val currentSign = cross.sign.toInt()
if (sign == 0 && currentSign != 0) {
sign = currentSign
} else if (currentSign != 0 && currentSign != sign) {
return false
}
}
return true
}