New quad algorithm: identify edges from contour orientation (#130)

Goal: improve precision of automatic document cropping by switching:
- from Douglas-Peucker algorithm (OpenCV's approxPolyDP) + a heuristic for documents missing a corner
- to an algorithm that looks for edges

* New quad algorithm: identify edges from contour orientation
* Performance optimization: reduce number of calls to trigonometric functions
* Performance: use a single threshold for live analysis
* Fix orientation of debug mask and compute it only if required
* Exclude quads that go out of the frame
This commit is contained in:
Pierre-Yves Nicolas
2026-03-07 12:09:41 +01:00
committed by GitHub
parent cf196576fe
commit 343495dafe
14 changed files with 488 additions and 316 deletions

View File

@@ -29,6 +29,7 @@ import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.coroutines.withContext
import org.fairscan.app.data.Logger
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.Mask
import org.opencv.core.CvType
import org.opencv.core.Mat
@@ -39,7 +40,6 @@ import org.tensorflow.lite.support.common.ops.NormalizeOp
import org.tensorflow.lite.support.image.ImageProcessor
import org.tensorflow.lite.support.image.TensorImage
import org.tensorflow.lite.support.image.ops.ResizeOp
import org.tensorflow.lite.support.image.ops.Rot90Op
import java.nio.ByteBuffer
import java.nio.ByteOrder
@@ -73,13 +73,11 @@ class ImageSegmentationService(private val context: Context, private val logger:
private fun runSegmentation(interpreter: Interpreter, bitmap: Bitmap, rotationDegrees: Int): SegmentationResult {
val startTime = SystemClock.uptimeMillis()
val rotation = -rotationDegrees / 90
val (_, h, w, _) = interpreter.getOutputTensor(0).shape()
val imageProcessor =
ImageProcessor
.Builder()
.add(ResizeOp(h, w, ResizeOp.ResizeMethod.BILINEAR))
.add(Rot90Op(rotation))
.add(NormalizeOp(127.5f, 127.5f)) // TODO check if it's correct
.build()
val tensorImage = TensorImage(DataType.FLOAT32)
@@ -88,7 +86,11 @@ class ImageSegmentationService(private val context: Context, private val logger:
val segmentResult = segment(interpreter, processedImage)
val inferenceTime = SystemClock.uptimeMillis() - startTime
return SegmentationResult(segmentResult, inferenceTime)
return SegmentationResult(
segmentResult,
ImageSize(bitmap.width, bitmap.height),
rotationDegrees,
inferenceTime)
}
suspend fun runSegmentationAndReturn(bitmap: Bitmap, rotationDegrees: Int): SegmentationResult? {
@@ -163,10 +165,14 @@ class ImageSegmentationService(private val context: Context, private val logger:
mask.put(0, 0, data)
return mask
}
fun maskSize() = ImageSize(width, height)
}
data class SegmentationResult(
val segmentation: Segmentation,
val originalSize: ImageSize,
val rotationDegrees: Int,
val inferenceTime: Long
)
}

View File

@@ -211,7 +211,7 @@ fun bindCameraUseCases(
@Composable
fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) {
val binaryMask = liveAnalysisState.binaryMask ?: return
val maskSize = liveAnalysisState.maskSize ?: return
val targetQuad = liveAnalysisState.stableQuad
var displayedQuad by remember { mutableStateOf<Quad?>(null) }
val quadColor = MaterialTheme.colorScheme.primary
@@ -233,14 +233,15 @@ fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) {
Canvas(modifier = Modifier.fillMaxSize()) {
if (debugMode) {
drawMask(this, binaryMask)
val binaryMask = liveAnalysisState.binaryMaskProvider.invoke()
binaryMask?.let { drawMask(this, it) }
}
displayedQuad?.let { quad ->
val scaledQuad = quad.scaledTo(
fromWidth = binaryMask.width,
fromHeight = binaryMask.height,
toWidth = size.width.toInt(),
toHeight = size.height.toInt()
fromWidth = maskSize.width,
fromHeight = maskSize.height,
toWidth = size.width.toDouble(),
toHeight = size.height.toDouble()
)
scaledQuad.edges().forEach {
drawLine(quadColor, it.from.toOffset(), it.to.toOffset(), 10.0f)

View File

@@ -16,12 +16,14 @@ package org.fairscan.app.ui.screens.camera
import android.graphics.Bitmap
import androidx.compose.runtime.Immutable
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.Quad
@Immutable
data class LiveAnalysisState(
val inferenceTime: Long = 0L,
val binaryMask: Bitmap? = null,
val maskSize: ImageSize? = null,
val binaryMaskProvider: () -> Bitmap? = { -> null },
val documentQuad: Quad? = null,
val stableQuad: Quad? = null,
)

View File

@@ -15,6 +15,7 @@
package org.fairscan.app.ui.screens.camera
import android.graphics.Bitmap
import android.graphics.Matrix
import androidx.camera.core.ImageProxy
import androidx.core.graphics.createBitmap
import androidx.lifecycle.ViewModel
@@ -72,16 +73,29 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
imageSegmentationService.segmentation
.filterNotNull()
.collect { result ->
// TODO Should we really call toBinaryMask if it's used only in debug mode?
val binaryMask = result.segmentation.toBinaryMask()
val binaryMaskProvider = { ->
var binaryMask: Bitmap = result.segmentation.toBinaryMask()
if (result.rotationDegrees != 0) {
binaryMask = rotateBitmap(binaryMask, result.rotationDegrees.toFloat())
}
binaryMask
}
val rawQuad = detectDocumentQuad(
result.segmentation,
result.originalSize,
isLiveAnalysis = true
)?.rotate90(
result.rotationDegrees / 90,
result.segmentation.width,
result.segmentation.height
)
val stableQuad = quadStabilizer.update(rawQuad)
_liveAnalysisState.value = LiveAnalysisState(
inferenceTime = result.inferenceTime,
binaryMask = binaryMask,
binaryMaskProvider = binaryMaskProvider,
maskSize = result.segmentation.maskSize(),
documentQuad = rawQuad,
stableQuad = stableQuad,
)
@@ -145,13 +159,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
private suspend fun processCapturedImage(
source: Bitmap,
rotationDegrees: Int
rotationDegrees: Int,
): CapturedPage? = withContext(Dispatchers.IO) {
var result: CapturedPage? = null
val segmentation = imageSegmentationService.runSegmentationAndReturn(source, 0)
if (segmentation != null) {
val mask = segmentation.segmentation
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
val quad = detectDocumentQuad(mask, segmentation.originalSize, isLiveAnalysis = false)
if (quad != null) {
val resizedQuad = quad.scaledTo(mask.width, mask.height, source.width, source.height)
result = extractDocumentFromBitmap(source, resizedQuad, rotationDegrees, mask)
@@ -230,3 +244,9 @@ fun toBitmap(bgr: Mat): Bitmap {
rgba.release()
return bmp
}
fun rotateBitmap(source: Bitmap, angle: Float): Bitmap {
val matrix = Matrix()
matrix.postRotate(angle)
return Bitmap.createBitmap(source, 0, 0, source.getWidth(), source.getHeight(), matrix, true)
}

View File

@@ -18,6 +18,7 @@ import org.fairscan.imageprocessing.detectDocumentQuad
import org.fairscan.imageprocessing.extractDocument
import org.fairscan.imageprocessing.isColoredDocument
import org.fairscan.imageprocessing.scaledTo
import org.fairscan.imageprocessing.toImageSize
import org.opencv.imgcodecs.Imgcodecs
import java.io.File
@@ -57,7 +58,7 @@ object ColorDetectionEvaluator {
val mask = MatMask(maskMat)
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
val quad = detectDocumentQuad(mask, mat.size().toImageSize(), isLiveAnalysis = false)
?.scaledTo(mask.width, mask.height, mat.width(), mat.height())
if (quad == null) continue

View File

@@ -19,6 +19,7 @@ import org.fairscan.imageprocessing.detectDocumentQuad
import org.fairscan.imageprocessing.extractDocument
import org.fairscan.imageprocessing.isColoredDocument
import org.fairscan.imageprocessing.scaledTo
import org.fairscan.imageprocessing.toImageSize
import org.opencv.core.Mat
import org.opencv.imgcodecs.Imgcodecs
import java.io.File
@@ -68,7 +69,8 @@ object DatasetEvaluator {
val mask = MatMask(maskMat)
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
val originalSize = inputMat.size().toImageSize()
val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false)
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
val corrected: Mat? = if (quad != null) {

View File

@@ -18,6 +18,7 @@ import org.fairscan.imageprocessing.detectDocumentQuad
import org.fairscan.imageprocessing.extractDocument
import org.fairscan.imageprocessing.isColoredDocument
import org.fairscan.imageprocessing.scaledTo
import org.fairscan.imageprocessing.toImageSize
import org.opencv.core.MatOfInt
import org.opencv.imgcodecs.Imgcodecs
import java.io.File
@@ -56,7 +57,8 @@ object ExportQualityEvaluator {
val mask = MatMask(maskMat)
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
val originalSize = sourceMat.size().toImageSize()
val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false)
?.scaledTo(mask.width, mask.height, sourceMat.width(), sourceMat.height())
if (quad == null) {
println("Failed to detect quad for $imgName")

View File

@@ -18,6 +18,7 @@ import nu.pattern.OpenCV
import org.fairscan.imageprocessing.detectDocumentQuad
import org.fairscan.imageprocessing.scaledTo
import org.fairscan.imageprocessing.toCv
import org.fairscan.imageprocessing.toImageSize
import org.opencv.core.Core
import org.opencv.core.Mat
import org.opencv.core.Scalar
@@ -63,7 +64,8 @@ object QuadDetectionEvaluator {
val mask = MatMask(maskMat)
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
val originalSize = inputMat.size().toImageSize()
val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false)
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
val inputOut = File(outputDir, "${e.name}_input.jpg")

View File

@@ -14,10 +14,11 @@
*/
package org.fairscan.imageprocessing
import org.fairscan.imageprocessing.quad.detectDocumentQuadFromProbmap
import org.fairscan.imageprocessing.quad.findQuadFromRightAngles
import org.fairscan.imageprocessing.quad.findQuadFromContourOrientation
import org.fairscan.imageprocessing.quad.minAreaRect
import org.fairscan.imageprocessing.quad.scoreQuadAgainstProbmap
import org.opencv.core.Core
import org.opencv.core.CvType
import org.opencv.core.Mat
import org.opencv.core.MatOfPoint
import org.opencv.core.MatOfPoint2f
@@ -31,35 +32,78 @@ interface Mask {
fun toMat(): Mat
}
fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Double = 0.02): Quad? {
fun detectDocumentQuad(mask: Mask, originalSize: ImageSize, isLiveAnalysis: Boolean): Quad? {
val mat = mask.toMat()
val (biggest: MatOfPoint2f?, area) = biggestContour(mat)
var vertices: List<Point>?
if (biggest != null && biggest.total() == 4L && area > mask.width * mask.height * minQuadAreaRatio) {
vertices = biggest.toList()?.map { Point(it.x, it.y) }
} else {
// Fallback 1: adjust threshold
val thresholds =
if (isLiveAnalysis) listOf(25.0, 50.0, 75.0) else (0..12).map { 0.2 + it * 0.05 }
vertices = detectDocumentQuadFromProbmap(mat, thresholds)
?.map { Point(it.x, it.y) }
if (vertices == null && biggest != null && biggest.total() > 4) {
// Fallback 2: look for right angles
// Best thresholds on test dataset: {0.95=146, 0.85=39, 0.75=35, 0.90=8, 0.70=1, 0.35=1}
val thresholds =
if (isLiveAnalysis) listOf(0.9) else listOf(0.5, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95)
var vertices = findQuadFromOrientationWithAdaptiveThreshold(mat, originalSize, thresholds)
?.map { Point(it.x, it.y) }
if (vertices == null && !isLiveAnalysis) {
// Fallback: bounding rectangle
val biggest = biggestContour(mat)
if (biggest != null) {
val polygon = biggest.toList().map { Point(it.x, it.y) }
vertices = findQuadFromRightAngles(polygon, mask.width, mask.height)
if (vertices == null && !isLiveAnalysis) {
// Fallback 3: bounding rectangle
vertices = minAreaRect(polygon, mask.width, mask.height)
}
vertices = minAreaRect(polygon, mask.width, mask.height)
}
}
return if (vertices?.size == 4) createQuad(vertices) else null
}
private fun biggestContour(mat: Mat): Pair<MatOfPoint2f?, Double> {
fun findQuadFromOrientationWithAdaptiveThreshold(
maskMat: Mat, originalSize: ImageSize, thresholds: List<Double>
): List<org.opencv.core.Point>? {
val probmapU8 = Mat()
val probmap = maskMat
probmap.convertTo(probmapU8, CvType.CV_8U, 255.0)
val probmapSmooth = Mat()
Imgproc.GaussianBlur(probmapU8, probmapSmooth, Size(3.0, 3.0), 0.0)
var bestQuad: List<org.opencv.core.Point>? = null
var bestScore = 0.0
for (thr in thresholds) {
val bin = Mat()
Imgproc.threshold(probmapSmooth, bin, thr * 255.0, 255.0, Imgproc.THRESH_BINARY)
val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0))
Imgproc.morphologyEx(bin, bin, Imgproc.MORPH_CLOSE, kernel)
val quad = findQuadFromOrientation(bin, originalSize)
if (quad != null && isValidQuad(quad, originalSize)) {
val probFloat = Mat()
probmap.convertTo(probFloat, CvType.CV_32F)
val score = scoreQuadAgainstProbmap(quad, probFloat, minQuadAreaRatio = 0.02)
if (score > bestScore) {
bestScore = score
bestQuad = quad
}
}
bin.release()
}
probmapSmooth.release()
probmapU8.release()
return bestQuad
}
fun isValidQuad(quad: List<org.opencv.core.Point>, originalSize: ImageSize): Boolean {
return quad.all {
it.x >= 0 && it.x <= originalSize.width
&& it.y >= 0 && it.y <= originalSize.height
}
}
fun findQuadFromOrientation(maskMat: Mat, originalSize: ImageSize): List<org.opencv.core.Point>? {
val contour = biggestContour(maskMat)
contour?:return null
val scaleX = originalSize.width / maskMat.size().width
val scaleY = originalSize.height / maskMat.size().height
return findQuadFromContourOrientation(
contour.toList().map { org.opencv.core.Point(it.x * scaleX, it.y * scaleY) }
)?.map { org.opencv.core.Point(it.x / scaleX, it.y / scaleY) }
}
fun biggestContour(mat: Mat): MatOfPoint? {
val refinedMask = refineMask(mat)
val blurred = Mat()
@@ -70,24 +114,19 @@ private fun biggestContour(mat: Mat): Pair<MatOfPoint2f?, Double> {
val contours = mutableListOf<MatOfPoint>()
val hierarchy = Mat()
Imgproc.findContours(edges, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE)
Imgproc.findContours(edges, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_NONE)
var biggest: MatOfPoint2f? = null
var biggest: MatOfPoint? = null
var maxArea = 0.0
for (contour in contours) {
val contour2f = MatOfPoint2f(*contour.toArray())
val peri = Imgproc.arcLength(contour2f, true)
val approx = MatOfPoint2f()
Imgproc.approxPolyDP(contour2f, approx, 0.02 * peri, true)
val area = abs(Imgproc.contourArea(approx))
val area = abs(Imgproc.contourArea(contour))
if (area > maxArea) {
maxArea = area
biggest = approx
biggest = contour
}
}
return Pair(biggest, maxArea)
return biggest
}
/**
@@ -171,3 +210,6 @@ fun Point.toCv(): org.opencv.core.Point {
return org.opencv.core.Point(x, y)
}
fun Size.toImageSize(): ImageSize {
return ImageSize(width, height)
}

View File

@@ -103,3 +103,7 @@ fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int):
fun Point.scaled(scaleX: Double, scaleY: Double): Point {
return Point((x * scaleX), (y * scaleY))
}
data class ImageSize(val width: Double, val height: Double) {
constructor(width: Int, height: Int) : this (width.toDouble(), height.toDouble())
}

View File

@@ -1,129 +0,0 @@
/*
* Copyright 2025-2026 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.imageprocessing.quad
import org.opencv.core.Mat
import org.opencv.core.CvType
import org.opencv.core.Size
import org.opencv.core.Point
import org.opencv.core.Scalar
import org.opencv.core.MatOfPoint
import org.opencv.core.MatOfPoint2f
import org.opencv.core.Core
import org.opencv.imgproc.Imgproc
import kotlin.math.abs
// Look for a threshold for which we find a quad in the mask
fun detectDocumentQuadFromProbmap(
probmap: Mat,
thresholds: List<Double>,
useOtsu: Boolean = true,
minQuadAreaRatio: Double = 0.02
): List<Point>? {
val probmapU8 = Mat()
probmap.convertTo(probmapU8, CvType.CV_8U, 255.0)
val probmapSmooth = Mat()
Imgproc.GaussianBlur(probmapU8, probmapSmooth, Size(3.0, 3.0), 0.0)
var bestScore = 0.0
var bestQuad: List<Point>? = null
// 1) Otsu
if (useOtsu) {
val otsu = Mat()
Imgproc.threshold(probmapSmooth, otsu, 0.0, 255.0, Imgproc.THRESH_BINARY + Imgproc.THRESH_OTSU)
val quad = findQuadFromBinaryMask(otsu, minQuadAreaRatio)
if (quad != null) {
val probFloat = Mat()
probmap.convertTo(probFloat, CvType.CV_32F)
val sc = scoreQuadAgainstProbmap(quad, probFloat)
if (sc > bestScore) {
bestScore = sc
bestQuad = quad
}
}
}
// 2) Threshold sweep
for (thr in thresholds) {
val bin = Mat()
Imgproc.threshold(probmapSmooth, bin, thr * 255.0, 255.0, Imgproc.THRESH_BINARY)
val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0))
Imgproc.morphologyEx(bin, bin, Imgproc.MORPH_CLOSE, kernel)
val quad = findQuadFromBinaryMask(bin, minQuadAreaRatio)
if (quad != null) {
val probFloat = Mat()
probmap.convertTo(probFloat, CvType.CV_32F)
val sc = scoreQuadAgainstProbmap(quad, probFloat)
if (sc > bestScore) {
bestScore = sc
bestQuad = quad
}
}
}
return bestQuad
}
// Fill polygon and return binary mask (0/1)
fun makePolygonMask(size: Size, polygon: List<Point>): Mat {
val mask = Mat.zeros(size, CvType.CV_8U)
val pts = MatOfPoint(*polygon.toTypedArray())
Imgproc.fillPoly(mask, listOf(pts), Scalar(1.0))
return mask
}
// Compute score between quad and probmap
fun scoreQuadAgainstProbmap(quad: List<Point>, probmap: Mat): Double {
val mask = makePolygonMask(probmap.size(), quad)
val maskFloat = Mat()
mask.convertTo(maskFloat, CvType.CV_32F)
val masked = Mat()
Core.multiply(probmap, maskFloat, masked)
val meanProb = Core.sumElems(masked).`val`[0] / Core.sumElems(maskFloat).`val`[0]
val areaRatio = Core.sumElems(maskFloat).`val`[0] / (probmap.rows() * probmap.cols())
return meanProb * (0.7 + 0.3 * areaRatio)
}
// Find largest quadrilateral in a binary mask
fun findQuadFromBinaryMask(binMask: Mat, minQuadAreaRatio: Double = 0.02): List<Point>? {
val blurred = Mat()
Imgproc.GaussianBlur(binMask, blurred, Size(5.0, 5.0), 0.0)
val edges = Mat()
Imgproc.Canny(blurred, edges, 75.0, 200.0)
val contours = mutableListOf<MatOfPoint>()
Imgproc.findContours(edges, contours, Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE)
var biggest: MatOfPoint2f? = null
var maxArea = 0.0
for (cnt in contours) {
val cnt2f = MatOfPoint2f(*cnt.toArray())
val peri = Imgproc.arcLength(cnt2f, true)
val approx = MatOfPoint2f()
Imgproc.approxPolyDP(cnt2f, approx, 0.02 * peri, true)
if (approx.rows() == 4) {
val area = abs(Imgproc.contourArea(approx))
if (area > maxArea) {
maxArea = area
biggest = approx
}
}
}
val totalArea = binMask.rows() * binMask.cols().toDouble()
return if (maxArea > totalArea * minQuadAreaRatio && biggest != null) {
biggest.toList()
} else null
}

View File

@@ -0,0 +1,308 @@
/*
* Copyright 2025-2026 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.imageprocessing.quad
import org.opencv.core.Point
import kotlin.math.abs
import kotlin.math.atan2
import kotlin.math.cos
import kotlin.math.hypot
import kotlin.math.sin
/**
Instead of detecting corners (like Douglas-Peucker), this algorithm detects
the four dominant sides of the document by segmenting the contour according
to stable edge orientations, then fits lines and intersects them to
reconstruct the quadrilateral.
*/
fun findQuadFromContourOrientation(
contour: List<Point>,
smoothWindow: Int = 5,
maxAngleVar: Double = Math.toRadians(5.0),
mergeAngle: Double = Math.toRadians(7.0),
minSideLengthRatio: Double = 0.02
): List<Point>? {
if (contour.size < 20) return null
val angles = computeSmoothedAngles(contour, smoothWindow)
val perimeter = contour.zipWithNext { a, b -> hypot(b.x - a.x, b.y - a.y) }.sum()
val minLength = perimeter * minSideLengthRatio
val segments = extractSegments(contour, angles, maxAngleVar, minLength)
val mergedSegments = mergeSegments(segments, mergeAngle)
val dominantSegments = selectDominantSegments(
mergedSegments,
maxCount = 4,
minAngleSeparation = Math.toRadians(25.0)
)
if (dominantSegments.size != 4) return null
val lines = dominantSegments.map {
val points = if (it.start < it.end)
contour.subList(it.start, it.end)
else
contour.subList(it.start, contour.size) + contour.subList(0, it.end)
fitLine(points)
}
val corners = mutableListOf<Point>()
for (i in 0 until 4) {
val p = intersectLines(lines[i], lines[(i + 1) % 4])
?: return null
corners += p
}
return corners
}
private fun normalizeAngle(a: Double): Double {
var x = a
while (x <= -Math.PI) x += 2 * Math.PI
while (x > Math.PI) x -= 2 * Math.PI
return x
}
private fun angleDiff(a: Double, b: Double): Double =
abs(normalizeAngle(a - b))
private data class Line(
val p: Point,
val d: Point
)
private fun fitLine(points: List<Point>): Line {
val cx = points.map { it.x }.average()
val cy = points.map { it.y }.average()
var xx = 0.0
var xy = 0.0
var yy = 0.0
for (p in points) {
val dx = p.x - cx
val dy = p.y - cy
xx += dx * dx
xy += dx * dy
yy += dy * dy
}
val theta = 0.5 * atan2(2 * xy, xx - yy)
val dir = Point(cos(theta), sin(theta))
return Line(Point(cx, cy), dir)
}
private fun intersectLines(l1: Line, l2: Line): Point? {
val x1 = l1.p.x
val y1 = l1.p.y
val x2 = x1 + l1.d.x
val y2 = y1 + l1.d.y
val x3 = l2.p.x
val y3 = l2.p.y
val x4 = x3 + l2.d.x
val y4 = y3 + l2.d.y
val denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
if (abs(denom) < 1e-6) return null
val px =
((x1*y2 - y1*x2)*(x3 - x4) - (x1 - x2)*(x3*y4 - y3*x4)) / denom
val py =
((x1*y2 - y1*x2)*(y3 - y4) - (y1 - y2)*(x3*y4 - y3*x4)) / denom
return Point(px, py)
}
private data class ContourSegment(
val start: Int,
val end: Int,
val angle: Double,
val length: Double
)
private fun extractSegments(
contour: List<Point>,
angles: DoubleArray,
maxAngleVar: Double,
minLength: Double
): List<ContourSegment> {
val n = contour.size
val result = mutableListOf<ContourSegment>()
val startIndex = findBestStartIndex(angles)
var start = startIndex
var refAngle = angles[startIndex]
fun segmentLength(s: Int, e: Int): Double {
var len = 0.0
var i = s
while (i != e) {
val j = (i + 1) % n
len += hypot(
contour[j].x - contour[i].x,
contour[j].y - contour[i].y
)
i = j
}
return len
}
var steps = 1
while (steps <= n) {
val idx = (startIndex + steps) % n
if (steps < n && angleDiff(angles[idx], refAngle) < maxAngleVar) {
refAngle = angleMean(refAngle, angles[idx])
} else {
val len = segmentLength(start, idx)
if (len >= minLength) {
result += ContourSegment(start, idx, refAngle, len)
}
start = idx
refAngle = angles[idx]
}
steps++
}
return result
}
private fun findBestStartIndex(angles: DoubleArray): Int {
val n = angles.size
var bestIndex = 0
var bestDelta = 0.0
for (i in 0 until n) {
val j = (i + 1) % n
val d = angleDiff(angles[i], angles[j])
if (d > bestDelta) {
bestDelta = d
bestIndex = j
}
}
return bestIndex
}
private fun angleMean(a: Double, b: Double): Double {
val x = cos(a) + cos(b)
val y = sin(a) + sin(b)
return atan2(y, x)
}
private fun computeSmoothedAngles(
contour: List<Point>,
window: Int
): DoubleArray {
val n = contour.size
// --- Step 1: raw angles ---
val angles = DoubleArray(n)
for (i in 0 until n) {
val p0 = contour[(i - 1 + n) % n]
val p1 = contour[(i + 1) % n]
angles[i] = atan2(p1.y - p0.y, p1.x - p0.x)
}
// --- Step 2: precompute cos/sin ---
val cosA = DoubleArray(n)
val sinA = DoubleArray(n)
for (i in 0 until n) {
cosA[i] = cos(angles[i])
sinA[i] = sin(angles[i])
}
// --- Step 3: sliding window smoothing ---
val smooth = DoubleArray(n)
var sx = 0.0
var sy = 0.0
// initial window centered on index 0
for (k in -window..window) {
val idx = (k + n) % n
sx += cosA[idx]
sy += sinA[idx]
}
smooth[0] = atan2(sy, sx)
for (i in 1 until n) {
val outIdx = (i - window - 1 + n) % n
val inIdx = (i + window) % n
sx -= cosA[outIdx]
sy -= sinA[outIdx]
sx += cosA[inIdx]
sy += sinA[inIdx]
smooth[i] = atan2(sy, sx)
}
return smooth
}
private fun mergeSegments(
segments: List<ContourSegment>,
angleThreshold: Double
): List<ContourSegment> {
if (segments.isEmpty()) return emptyList()
if (segments.size <= 4) return segments
val merged = mutableListOf<ContourSegment>()
var cur = segments[0]
for (i in 1 until segments.size) {
val p = segments[i]
if (angleDiff(p.angle, cur.angle) < angleThreshold) {
cur = ContourSegment(
cur.start,
p.end,
angleMean(cur.angle, p.angle),
cur.length + p.length
)
} else {
merged += cur
cur = p
}
}
merged += cur
return merged
}
private fun selectDominantSegments(
segments: List<ContourSegment>,
maxCount: Int,
minAngleSeparation: Double
): List<ContourSegment> {
val sorted = segments.sortedByDescending { it.length }
val selected = mutableListOf<ContourSegment>()
for (p in sorted) {
val tooClose = selected.any { s ->
angleDiff(p.angle, s.angle) < minAngleSeparation
}
if (!tooClose) {
selected += p
if (selected.size == maxCount) break
}
}
return selected.sortedBy { it.start }
}

View File

@@ -0,0 +1,44 @@
/*
* Copyright 2025-2026 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.imageprocessing.quad
import org.opencv.core.Core
import org.opencv.core.CvType
import org.opencv.core.Mat
import org.opencv.core.MatOfPoint
import org.opencv.core.Point
import org.opencv.core.Scalar
import org.opencv.core.Size
import org.opencv.imgproc.Imgproc
// Fill polygon and return binary mask (0/1)
fun makePolygonMask(size: Size, polygon: List<Point>): Mat {
val mask = Mat.zeros(size, CvType.CV_8U)
val pts = MatOfPoint(*polygon.toTypedArray())
Imgproc.fillPoly(mask, listOf(pts), Scalar(1.0))
return mask
}
// Compute score between quad and probmap
fun scoreQuadAgainstProbmap(quad: List<Point>, probmap: Mat, minQuadAreaRatio: Double): Double {
val mask = makePolygonMask(probmap.size(), quad)
val maskFloat = Mat()
mask.convertTo(maskFloat, CvType.CV_32F)
val masked = Mat()
Core.multiply(probmap, maskFloat, masked)
val meanProb = Core.sumElems(masked).`val`[0] / Core.sumElems(maskFloat).`val`[0]
val areaRatio = Core.sumElems(maskFloat).`val`[0] / (probmap.rows() * probmap.cols())
return if (areaRatio < minQuadAreaRatio) 0.0 else meanProb * (0.7 + 0.3 * areaRatio)
}

View File

@@ -1,133 +0,0 @@
/*
* Copyright 2025-2026 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.imageprocessing.quad
import org.fairscan.imageprocessing.Point
import kotlin.math.abs
import kotlin.math.acos
import kotlin.math.sqrt
import kotlin.math.sign
// Look for 3 consecutive angles that are (almost) right angles
fun findQuadFromRightAngles(
points: List<Point>,
imgWidth: Int,
imgHeight: Int,
angleMin: Float = 60f,
angleMax: Float = 120f
): List<Point>? {
if (points.size < 4) return null
val n = points.size
val angles = mutableListOf<Double>()
for (i in 0 until n) {
val a = points[(i + n - 1) % n]
val b = points[i]
val c = points[(i + 1) % n]
angles.add(orientedAngle(a, b, c))
}
var bestQuad: List<Point>? = null
var bestScore = Double.POSITIVE_INFINITY
for (i in 0 until n) {
val triplet = listOf(angles[i % n], angles[(i + 1) % n], angles[(i + 2) % n])
if (triplet.all { it in angleMin..angleMax }) {
val a = points[(i + n - 1) % n]
val b = points[i]
val c = points[(i + 1) % n]
val d = points[(i + 2) % n]
val e = points[(i + 3) % n]
val inter = lineIntersection2(a, b, d, e) ?: continue
val quad = listOf(b, c, d, inter)
// ensure inside image bounds
if (quad.any { it.x < 0 || it.x >= imgWidth || it.y < 0 || it.y >= imgHeight }) continue
// ensure convex
if (!isConvex(quad)) continue
val score = quadAngleError(quad)
if (score < bestScore) {
bestScore = score
bestQuad = quad
}
}
}
return bestQuad
}
fun angleBetween(v1: Point, v2: Point): Float {
val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f
val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f
val dot = (v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)
val cosAngle = dot.coerceIn(-1.0, 1.0)
return Math.toDegrees(acos(cosAngle).toDouble()).toFloat()
}
fun orientedAngle(a: Point, b: Point, c: Point): Double {
val v1 = Point(a.x - b.x, a.y - b.y)
val v2 = Point(c.x - b.x, c.y - b.y)
val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f
val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f
val dot = ((v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)).coerceIn(-1.0, 1.0)
val cross = v1.x * v2.y - v1.y * v2.x
var angle = Math.toDegrees(acos(dot))
if (cross < 0) angle = 360.0 - angle
return angle
}
fun lineIntersection2(p1: Point, p2: Point, p3: Point, p4: Point): Point? {
val denom = (p1.x - p2.x) * (p3.y - p4.y) - (p1.y - p2.y) * (p3.x - p4.x)
if (abs(denom) < 1e-6f) return null
val numX = (p1.x * p2.y - p1.y * p2.x)
val numY = (p3.x * p4.y - p3.y * p4.x)
val px = (numX * (p3.x - p4.x) - (p1.x - p2.x) * numY) / denom
val py = (numX * (p3.y - p4.y) - (p1.y - p2.y) * numY) / denom
return Point(px, py)
}
fun quadAngleError(quad: List<Point>): Double {
var err = 0.0
for (i in 0 until 4) {
val a = quad[(i + 3) % 4]
val b = quad[i]
val c = quad[(i + 1) % 4]
val ang = angleBetween(Point(a.x - b.x, a.y - b.y), Point(c.x - b.x, c.y - b.y))
err += abs(ang - 90.0)
}
return err
}
fun isConvex(quad: List<Point>): Boolean {
if (quad.size != 4) return false
var sign = 0
for (i in quad.indices) {
val a = quad[i]
val b = quad[(i + 1) % 4]
val c = quad[(i + 2) % 4]
val cross = (b.x - a.x) * (c.y - b.y) - (b.y - a.y) * (c.x - b.x)
val currentSign = cross.sign.toInt()
if (sign == 0 && currentSign != 0) {
sign = currentSign
} else if (currentSign != 0 && currentSign != sign) {
return false
}
}
return true
}