New quad algorithm: identify edges from contour orientation (#130)
Goal: improve precision of automatic document cropping by switching: - from Douglas-Peucker algorithm (OpenCV's approxPolyDP) + a heuristic for documents missing a corner - to an algorithm that looks for edges * New quad algorithm: identify edges from contour orientation * Performance optimization: reduce number of calls to trigonometric functions * Performance: use a single threshold for live analysis * Fix orientation of debug mask and compute it only if required * Exclude quads that go out of the frame
This commit is contained in:
committed by
GitHub
parent
cf196576fe
commit
343495dafe
@@ -29,6 +29,7 @@ import kotlinx.coroutines.sync.Mutex
|
|||||||
import kotlinx.coroutines.sync.withLock
|
import kotlinx.coroutines.sync.withLock
|
||||||
import kotlinx.coroutines.withContext
|
import kotlinx.coroutines.withContext
|
||||||
import org.fairscan.app.data.Logger
|
import org.fairscan.app.data.Logger
|
||||||
|
import org.fairscan.imageprocessing.ImageSize
|
||||||
import org.fairscan.imageprocessing.Mask
|
import org.fairscan.imageprocessing.Mask
|
||||||
import org.opencv.core.CvType
|
import org.opencv.core.CvType
|
||||||
import org.opencv.core.Mat
|
import org.opencv.core.Mat
|
||||||
@@ -39,7 +40,6 @@ import org.tensorflow.lite.support.common.ops.NormalizeOp
|
|||||||
import org.tensorflow.lite.support.image.ImageProcessor
|
import org.tensorflow.lite.support.image.ImageProcessor
|
||||||
import org.tensorflow.lite.support.image.TensorImage
|
import org.tensorflow.lite.support.image.TensorImage
|
||||||
import org.tensorflow.lite.support.image.ops.ResizeOp
|
import org.tensorflow.lite.support.image.ops.ResizeOp
|
||||||
import org.tensorflow.lite.support.image.ops.Rot90Op
|
|
||||||
import java.nio.ByteBuffer
|
import java.nio.ByteBuffer
|
||||||
import java.nio.ByteOrder
|
import java.nio.ByteOrder
|
||||||
|
|
||||||
@@ -73,13 +73,11 @@ class ImageSegmentationService(private val context: Context, private val logger:
|
|||||||
private fun runSegmentation(interpreter: Interpreter, bitmap: Bitmap, rotationDegrees: Int): SegmentationResult {
|
private fun runSegmentation(interpreter: Interpreter, bitmap: Bitmap, rotationDegrees: Int): SegmentationResult {
|
||||||
val startTime = SystemClock.uptimeMillis()
|
val startTime = SystemClock.uptimeMillis()
|
||||||
|
|
||||||
val rotation = -rotationDegrees / 90
|
|
||||||
val (_, h, w, _) = interpreter.getOutputTensor(0).shape()
|
val (_, h, w, _) = interpreter.getOutputTensor(0).shape()
|
||||||
val imageProcessor =
|
val imageProcessor =
|
||||||
ImageProcessor
|
ImageProcessor
|
||||||
.Builder()
|
.Builder()
|
||||||
.add(ResizeOp(h, w, ResizeOp.ResizeMethod.BILINEAR))
|
.add(ResizeOp(h, w, ResizeOp.ResizeMethod.BILINEAR))
|
||||||
.add(Rot90Op(rotation))
|
|
||||||
.add(NormalizeOp(127.5f, 127.5f)) // TODO check if it's correct
|
.add(NormalizeOp(127.5f, 127.5f)) // TODO check if it's correct
|
||||||
.build()
|
.build()
|
||||||
val tensorImage = TensorImage(DataType.FLOAT32)
|
val tensorImage = TensorImage(DataType.FLOAT32)
|
||||||
@@ -88,7 +86,11 @@ class ImageSegmentationService(private val context: Context, private val logger:
|
|||||||
val segmentResult = segment(interpreter, processedImage)
|
val segmentResult = segment(interpreter, processedImage)
|
||||||
|
|
||||||
val inferenceTime = SystemClock.uptimeMillis() - startTime
|
val inferenceTime = SystemClock.uptimeMillis() - startTime
|
||||||
return SegmentationResult(segmentResult, inferenceTime)
|
return SegmentationResult(
|
||||||
|
segmentResult,
|
||||||
|
ImageSize(bitmap.width, bitmap.height),
|
||||||
|
rotationDegrees,
|
||||||
|
inferenceTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
suspend fun runSegmentationAndReturn(bitmap: Bitmap, rotationDegrees: Int): SegmentationResult? {
|
suspend fun runSegmentationAndReturn(bitmap: Bitmap, rotationDegrees: Int): SegmentationResult? {
|
||||||
@@ -163,10 +165,14 @@ class ImageSegmentationService(private val context: Context, private val logger:
|
|||||||
mask.put(0, 0, data)
|
mask.put(0, 0, data)
|
||||||
return mask
|
return mask
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun maskSize() = ImageSize(width, height)
|
||||||
}
|
}
|
||||||
|
|
||||||
data class SegmentationResult(
|
data class SegmentationResult(
|
||||||
val segmentation: Segmentation,
|
val segmentation: Segmentation,
|
||||||
|
val originalSize: ImageSize,
|
||||||
|
val rotationDegrees: Int,
|
||||||
val inferenceTime: Long
|
val inferenceTime: Long
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -211,7 +211,7 @@ fun bindCameraUseCases(
|
|||||||
|
|
||||||
@Composable
|
@Composable
|
||||||
fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) {
|
fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) {
|
||||||
val binaryMask = liveAnalysisState.binaryMask ?: return
|
val maskSize = liveAnalysisState.maskSize ?: return
|
||||||
val targetQuad = liveAnalysisState.stableQuad
|
val targetQuad = liveAnalysisState.stableQuad
|
||||||
var displayedQuad by remember { mutableStateOf<Quad?>(null) }
|
var displayedQuad by remember { mutableStateOf<Quad?>(null) }
|
||||||
val quadColor = MaterialTheme.colorScheme.primary
|
val quadColor = MaterialTheme.colorScheme.primary
|
||||||
@@ -233,14 +233,15 @@ fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) {
|
|||||||
|
|
||||||
Canvas(modifier = Modifier.fillMaxSize()) {
|
Canvas(modifier = Modifier.fillMaxSize()) {
|
||||||
if (debugMode) {
|
if (debugMode) {
|
||||||
drawMask(this, binaryMask)
|
val binaryMask = liveAnalysisState.binaryMaskProvider.invoke()
|
||||||
|
binaryMask?.let { drawMask(this, it) }
|
||||||
}
|
}
|
||||||
displayedQuad?.let { quad ->
|
displayedQuad?.let { quad ->
|
||||||
val scaledQuad = quad.scaledTo(
|
val scaledQuad = quad.scaledTo(
|
||||||
fromWidth = binaryMask.width,
|
fromWidth = maskSize.width,
|
||||||
fromHeight = binaryMask.height,
|
fromHeight = maskSize.height,
|
||||||
toWidth = size.width.toInt(),
|
toWidth = size.width.toDouble(),
|
||||||
toHeight = size.height.toInt()
|
toHeight = size.height.toDouble()
|
||||||
)
|
)
|
||||||
scaledQuad.edges().forEach {
|
scaledQuad.edges().forEach {
|
||||||
drawLine(quadColor, it.from.toOffset(), it.to.toOffset(), 10.0f)
|
drawLine(quadColor, it.from.toOffset(), it.to.toOffset(), 10.0f)
|
||||||
|
|||||||
@@ -16,12 +16,14 @@ package org.fairscan.app.ui.screens.camera
|
|||||||
|
|
||||||
import android.graphics.Bitmap
|
import android.graphics.Bitmap
|
||||||
import androidx.compose.runtime.Immutable
|
import androidx.compose.runtime.Immutable
|
||||||
|
import org.fairscan.imageprocessing.ImageSize
|
||||||
import org.fairscan.imageprocessing.Quad
|
import org.fairscan.imageprocessing.Quad
|
||||||
|
|
||||||
@Immutable
|
@Immutable
|
||||||
data class LiveAnalysisState(
|
data class LiveAnalysisState(
|
||||||
val inferenceTime: Long = 0L,
|
val inferenceTime: Long = 0L,
|
||||||
val binaryMask: Bitmap? = null,
|
val maskSize: ImageSize? = null,
|
||||||
|
val binaryMaskProvider: () -> Bitmap? = { -> null },
|
||||||
val documentQuad: Quad? = null,
|
val documentQuad: Quad? = null,
|
||||||
val stableQuad: Quad? = null,
|
val stableQuad: Quad? = null,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
package org.fairscan.app.ui.screens.camera
|
package org.fairscan.app.ui.screens.camera
|
||||||
|
|
||||||
import android.graphics.Bitmap
|
import android.graphics.Bitmap
|
||||||
|
import android.graphics.Matrix
|
||||||
import androidx.camera.core.ImageProxy
|
import androidx.camera.core.ImageProxy
|
||||||
import androidx.core.graphics.createBitmap
|
import androidx.core.graphics.createBitmap
|
||||||
import androidx.lifecycle.ViewModel
|
import androidx.lifecycle.ViewModel
|
||||||
@@ -72,16 +73,29 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
|||||||
imageSegmentationService.segmentation
|
imageSegmentationService.segmentation
|
||||||
.filterNotNull()
|
.filterNotNull()
|
||||||
.collect { result ->
|
.collect { result ->
|
||||||
// TODO Should we really call toBinaryMask if it's used only in debug mode?
|
val binaryMaskProvider = { ->
|
||||||
val binaryMask = result.segmentation.toBinaryMask()
|
var binaryMask: Bitmap = result.segmentation.toBinaryMask()
|
||||||
|
if (result.rotationDegrees != 0) {
|
||||||
|
binaryMask = rotateBitmap(binaryMask, result.rotationDegrees.toFloat())
|
||||||
|
}
|
||||||
|
binaryMask
|
||||||
|
}
|
||||||
|
|
||||||
val rawQuad = detectDocumentQuad(
|
val rawQuad = detectDocumentQuad(
|
||||||
result.segmentation,
|
result.segmentation,
|
||||||
|
result.originalSize,
|
||||||
isLiveAnalysis = true
|
isLiveAnalysis = true
|
||||||
|
)?.rotate90(
|
||||||
|
result.rotationDegrees / 90,
|
||||||
|
result.segmentation.width,
|
||||||
|
result.segmentation.height
|
||||||
)
|
)
|
||||||
|
|
||||||
val stableQuad = quadStabilizer.update(rawQuad)
|
val stableQuad = quadStabilizer.update(rawQuad)
|
||||||
_liveAnalysisState.value = LiveAnalysisState(
|
_liveAnalysisState.value = LiveAnalysisState(
|
||||||
inferenceTime = result.inferenceTime,
|
inferenceTime = result.inferenceTime,
|
||||||
binaryMask = binaryMask,
|
binaryMaskProvider = binaryMaskProvider,
|
||||||
|
maskSize = result.segmentation.maskSize(),
|
||||||
documentQuad = rawQuad,
|
documentQuad = rawQuad,
|
||||||
stableQuad = stableQuad,
|
stableQuad = stableQuad,
|
||||||
)
|
)
|
||||||
@@ -145,13 +159,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
|||||||
|
|
||||||
private suspend fun processCapturedImage(
|
private suspend fun processCapturedImage(
|
||||||
source: Bitmap,
|
source: Bitmap,
|
||||||
rotationDegrees: Int
|
rotationDegrees: Int,
|
||||||
): CapturedPage? = withContext(Dispatchers.IO) {
|
): CapturedPage? = withContext(Dispatchers.IO) {
|
||||||
var result: CapturedPage? = null
|
var result: CapturedPage? = null
|
||||||
val segmentation = imageSegmentationService.runSegmentationAndReturn(source, 0)
|
val segmentation = imageSegmentationService.runSegmentationAndReturn(source, 0)
|
||||||
if (segmentation != null) {
|
if (segmentation != null) {
|
||||||
val mask = segmentation.segmentation
|
val mask = segmentation.segmentation
|
||||||
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
|
val quad = detectDocumentQuad(mask, segmentation.originalSize, isLiveAnalysis = false)
|
||||||
if (quad != null) {
|
if (quad != null) {
|
||||||
val resizedQuad = quad.scaledTo(mask.width, mask.height, source.width, source.height)
|
val resizedQuad = quad.scaledTo(mask.width, mask.height, source.width, source.height)
|
||||||
result = extractDocumentFromBitmap(source, resizedQuad, rotationDegrees, mask)
|
result = extractDocumentFromBitmap(source, resizedQuad, rotationDegrees, mask)
|
||||||
@@ -230,3 +244,9 @@ fun toBitmap(bgr: Mat): Bitmap {
|
|||||||
rgba.release()
|
rgba.release()
|
||||||
return bmp
|
return bmp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun rotateBitmap(source: Bitmap, angle: Float): Bitmap {
|
||||||
|
val matrix = Matrix()
|
||||||
|
matrix.postRotate(angle)
|
||||||
|
return Bitmap.createBitmap(source, 0, 0, source.getWidth(), source.getHeight(), matrix, true)
|
||||||
|
}
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import org.fairscan.imageprocessing.detectDocumentQuad
|
|||||||
import org.fairscan.imageprocessing.extractDocument
|
import org.fairscan.imageprocessing.extractDocument
|
||||||
import org.fairscan.imageprocessing.isColoredDocument
|
import org.fairscan.imageprocessing.isColoredDocument
|
||||||
import org.fairscan.imageprocessing.scaledTo
|
import org.fairscan.imageprocessing.scaledTo
|
||||||
|
import org.fairscan.imageprocessing.toImageSize
|
||||||
import org.opencv.imgcodecs.Imgcodecs
|
import org.opencv.imgcodecs.Imgcodecs
|
||||||
import java.io.File
|
import java.io.File
|
||||||
|
|
||||||
@@ -57,7 +58,7 @@ object ColorDetectionEvaluator {
|
|||||||
|
|
||||||
val mask = MatMask(maskMat)
|
val mask = MatMask(maskMat)
|
||||||
|
|
||||||
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
|
val quad = detectDocumentQuad(mask, mat.size().toImageSize(), isLiveAnalysis = false)
|
||||||
?.scaledTo(mask.width, mask.height, mat.width(), mat.height())
|
?.scaledTo(mask.width, mask.height, mat.width(), mat.height())
|
||||||
|
|
||||||
if (quad == null) continue
|
if (quad == null) continue
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import org.fairscan.imageprocessing.detectDocumentQuad
|
|||||||
import org.fairscan.imageprocessing.extractDocument
|
import org.fairscan.imageprocessing.extractDocument
|
||||||
import org.fairscan.imageprocessing.isColoredDocument
|
import org.fairscan.imageprocessing.isColoredDocument
|
||||||
import org.fairscan.imageprocessing.scaledTo
|
import org.fairscan.imageprocessing.scaledTo
|
||||||
|
import org.fairscan.imageprocessing.toImageSize
|
||||||
import org.opencv.core.Mat
|
import org.opencv.core.Mat
|
||||||
import org.opencv.imgcodecs.Imgcodecs
|
import org.opencv.imgcodecs.Imgcodecs
|
||||||
import java.io.File
|
import java.io.File
|
||||||
@@ -68,7 +69,8 @@ object DatasetEvaluator {
|
|||||||
|
|
||||||
val mask = MatMask(maskMat)
|
val mask = MatMask(maskMat)
|
||||||
|
|
||||||
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
|
val originalSize = inputMat.size().toImageSize()
|
||||||
|
val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false)
|
||||||
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
|
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
|
||||||
|
|
||||||
val corrected: Mat? = if (quad != null) {
|
val corrected: Mat? = if (quad != null) {
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import org.fairscan.imageprocessing.detectDocumentQuad
|
|||||||
import org.fairscan.imageprocessing.extractDocument
|
import org.fairscan.imageprocessing.extractDocument
|
||||||
import org.fairscan.imageprocessing.isColoredDocument
|
import org.fairscan.imageprocessing.isColoredDocument
|
||||||
import org.fairscan.imageprocessing.scaledTo
|
import org.fairscan.imageprocessing.scaledTo
|
||||||
|
import org.fairscan.imageprocessing.toImageSize
|
||||||
import org.opencv.core.MatOfInt
|
import org.opencv.core.MatOfInt
|
||||||
import org.opencv.imgcodecs.Imgcodecs
|
import org.opencv.imgcodecs.Imgcodecs
|
||||||
import java.io.File
|
import java.io.File
|
||||||
@@ -56,7 +57,8 @@ object ExportQualityEvaluator {
|
|||||||
|
|
||||||
val mask = MatMask(maskMat)
|
val mask = MatMask(maskMat)
|
||||||
|
|
||||||
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
|
val originalSize = sourceMat.size().toImageSize()
|
||||||
|
val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false)
|
||||||
?.scaledTo(mask.width, mask.height, sourceMat.width(), sourceMat.height())
|
?.scaledTo(mask.width, mask.height, sourceMat.width(), sourceMat.height())
|
||||||
if (quad == null) {
|
if (quad == null) {
|
||||||
println("Failed to detect quad for $imgName")
|
println("Failed to detect quad for $imgName")
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import nu.pattern.OpenCV
|
|||||||
import org.fairscan.imageprocessing.detectDocumentQuad
|
import org.fairscan.imageprocessing.detectDocumentQuad
|
||||||
import org.fairscan.imageprocessing.scaledTo
|
import org.fairscan.imageprocessing.scaledTo
|
||||||
import org.fairscan.imageprocessing.toCv
|
import org.fairscan.imageprocessing.toCv
|
||||||
|
import org.fairscan.imageprocessing.toImageSize
|
||||||
import org.opencv.core.Core
|
import org.opencv.core.Core
|
||||||
import org.opencv.core.Mat
|
import org.opencv.core.Mat
|
||||||
import org.opencv.core.Scalar
|
import org.opencv.core.Scalar
|
||||||
@@ -63,7 +64,8 @@ object QuadDetectionEvaluator {
|
|||||||
|
|
||||||
val mask = MatMask(maskMat)
|
val mask = MatMask(maskMat)
|
||||||
|
|
||||||
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
|
val originalSize = inputMat.size().toImageSize()
|
||||||
|
val quad = detectDocumentQuad(mask, originalSize, isLiveAnalysis = false)
|
||||||
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
|
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
|
||||||
|
|
||||||
val inputOut = File(outputDir, "${e.name}_input.jpg")
|
val inputOut = File(outputDir, "${e.name}_input.jpg")
|
||||||
|
|||||||
@@ -14,10 +14,11 @@
|
|||||||
*/
|
*/
|
||||||
package org.fairscan.imageprocessing
|
package org.fairscan.imageprocessing
|
||||||
|
|
||||||
import org.fairscan.imageprocessing.quad.detectDocumentQuadFromProbmap
|
import org.fairscan.imageprocessing.quad.findQuadFromContourOrientation
|
||||||
import org.fairscan.imageprocessing.quad.findQuadFromRightAngles
|
|
||||||
import org.fairscan.imageprocessing.quad.minAreaRect
|
import org.fairscan.imageprocessing.quad.minAreaRect
|
||||||
|
import org.fairscan.imageprocessing.quad.scoreQuadAgainstProbmap
|
||||||
import org.opencv.core.Core
|
import org.opencv.core.Core
|
||||||
|
import org.opencv.core.CvType
|
||||||
import org.opencv.core.Mat
|
import org.opencv.core.Mat
|
||||||
import org.opencv.core.MatOfPoint
|
import org.opencv.core.MatOfPoint
|
||||||
import org.opencv.core.MatOfPoint2f
|
import org.opencv.core.MatOfPoint2f
|
||||||
@@ -31,35 +32,78 @@ interface Mask {
|
|||||||
fun toMat(): Mat
|
fun toMat(): Mat
|
||||||
}
|
}
|
||||||
|
|
||||||
fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Double = 0.02): Quad? {
|
fun detectDocumentQuad(mask: Mask, originalSize: ImageSize, isLiveAnalysis: Boolean): Quad? {
|
||||||
val mat = mask.toMat()
|
val mat = mask.toMat()
|
||||||
val (biggest: MatOfPoint2f?, area) = biggestContour(mat)
|
// Best thresholds on test dataset: {0.95=146, 0.85=39, 0.75=35, 0.90=8, 0.70=1, 0.35=1}
|
||||||
var vertices: List<Point>?
|
|
||||||
if (biggest != null && biggest.total() == 4L && area > mask.width * mask.height * minQuadAreaRatio) {
|
|
||||||
vertices = biggest.toList()?.map { Point(it.x, it.y) }
|
|
||||||
} else {
|
|
||||||
|
|
||||||
// Fallback 1: adjust threshold
|
|
||||||
val thresholds =
|
val thresholds =
|
||||||
if (isLiveAnalysis) listOf(25.0, 50.0, 75.0) else (0..12).map { 0.2 + it * 0.05 }
|
if (isLiveAnalysis) listOf(0.9) else listOf(0.5, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95)
|
||||||
vertices = detectDocumentQuadFromProbmap(mat, thresholds)
|
var vertices = findQuadFromOrientationWithAdaptiveThreshold(mat, originalSize, thresholds)
|
||||||
?.map { Point(it.x, it.y) }
|
?.map { Point(it.x, it.y) }
|
||||||
if (vertices == null && biggest != null && biggest.total() > 4) {
|
|
||||||
|
|
||||||
// Fallback 2: look for right angles
|
|
||||||
val polygon = biggest.toList().map { Point(it.x, it.y) }
|
|
||||||
vertices = findQuadFromRightAngles(polygon, mask.width, mask.height)
|
|
||||||
if (vertices == null && !isLiveAnalysis) {
|
if (vertices == null && !isLiveAnalysis) {
|
||||||
|
// Fallback: bounding rectangle
|
||||||
// Fallback 3: bounding rectangle
|
val biggest = biggestContour(mat)
|
||||||
|
if (biggest != null) {
|
||||||
|
val polygon = biggest.toList().map { Point(it.x, it.y) }
|
||||||
vertices = minAreaRect(polygon, mask.width, mask.height)
|
vertices = minAreaRect(polygon, mask.width, mask.height)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return if (vertices?.size == 4) createQuad(vertices) else null
|
return if (vertices?.size == 4) createQuad(vertices) else null
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun biggestContour(mat: Mat): Pair<MatOfPoint2f?, Double> {
|
fun findQuadFromOrientationWithAdaptiveThreshold(
|
||||||
|
maskMat: Mat, originalSize: ImageSize, thresholds: List<Double>
|
||||||
|
): List<org.opencv.core.Point>? {
|
||||||
|
val probmapU8 = Mat()
|
||||||
|
val probmap = maskMat
|
||||||
|
probmap.convertTo(probmapU8, CvType.CV_8U, 255.0)
|
||||||
|
val probmapSmooth = Mat()
|
||||||
|
Imgproc.GaussianBlur(probmapU8, probmapSmooth, Size(3.0, 3.0), 0.0)
|
||||||
|
|
||||||
|
var bestQuad: List<org.opencv.core.Point>? = null
|
||||||
|
var bestScore = 0.0
|
||||||
|
for (thr in thresholds) {
|
||||||
|
val bin = Mat()
|
||||||
|
Imgproc.threshold(probmapSmooth, bin, thr * 255.0, 255.0, Imgproc.THRESH_BINARY)
|
||||||
|
val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0))
|
||||||
|
Imgproc.morphologyEx(bin, bin, Imgproc.MORPH_CLOSE, kernel)
|
||||||
|
val quad = findQuadFromOrientation(bin, originalSize)
|
||||||
|
if (quad != null && isValidQuad(quad, originalSize)) {
|
||||||
|
val probFloat = Mat()
|
||||||
|
probmap.convertTo(probFloat, CvType.CV_32F)
|
||||||
|
val score = scoreQuadAgainstProbmap(quad, probFloat, minQuadAreaRatio = 0.02)
|
||||||
|
if (score > bestScore) {
|
||||||
|
bestScore = score
|
||||||
|
bestQuad = quad
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bin.release()
|
||||||
|
}
|
||||||
|
|
||||||
|
probmapSmooth.release()
|
||||||
|
probmapU8.release()
|
||||||
|
return bestQuad
|
||||||
|
}
|
||||||
|
|
||||||
|
fun isValidQuad(quad: List<org.opencv.core.Point>, originalSize: ImageSize): Boolean {
|
||||||
|
return quad.all {
|
||||||
|
it.x >= 0 && it.x <= originalSize.width
|
||||||
|
&& it.y >= 0 && it.y <= originalSize.height
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun findQuadFromOrientation(maskMat: Mat, originalSize: ImageSize): List<org.opencv.core.Point>? {
|
||||||
|
val contour = biggestContour(maskMat)
|
||||||
|
contour?:return null
|
||||||
|
|
||||||
|
val scaleX = originalSize.width / maskMat.size().width
|
||||||
|
val scaleY = originalSize.height / maskMat.size().height
|
||||||
|
|
||||||
|
return findQuadFromContourOrientation(
|
||||||
|
contour.toList().map { org.opencv.core.Point(it.x * scaleX, it.y * scaleY) }
|
||||||
|
)?.map { org.opencv.core.Point(it.x / scaleX, it.y / scaleY) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fun biggestContour(mat: Mat): MatOfPoint? {
|
||||||
val refinedMask = refineMask(mat)
|
val refinedMask = refineMask(mat)
|
||||||
|
|
||||||
val blurred = Mat()
|
val blurred = Mat()
|
||||||
@@ -70,24 +114,19 @@ private fun biggestContour(mat: Mat): Pair<MatOfPoint2f?, Double> {
|
|||||||
|
|
||||||
val contours = mutableListOf<MatOfPoint>()
|
val contours = mutableListOf<MatOfPoint>()
|
||||||
val hierarchy = Mat()
|
val hierarchy = Mat()
|
||||||
Imgproc.findContours(edges, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE)
|
Imgproc.findContours(edges, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_NONE)
|
||||||
|
|
||||||
var biggest: MatOfPoint2f? = null
|
var biggest: MatOfPoint? = null
|
||||||
var maxArea = 0.0
|
var maxArea = 0.0
|
||||||
|
|
||||||
for (contour in contours) {
|
for (contour in contours) {
|
||||||
val contour2f = MatOfPoint2f(*contour.toArray())
|
val area = abs(Imgproc.contourArea(contour))
|
||||||
val peri = Imgproc.arcLength(contour2f, true)
|
|
||||||
val approx = MatOfPoint2f()
|
|
||||||
Imgproc.approxPolyDP(contour2f, approx, 0.02 * peri, true)
|
|
||||||
|
|
||||||
val area = abs(Imgproc.contourArea(approx))
|
|
||||||
if (area > maxArea) {
|
if (area > maxArea) {
|
||||||
maxArea = area
|
maxArea = area
|
||||||
biggest = approx
|
biggest = contour
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Pair(biggest, maxArea)
|
return biggest
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -171,3 +210,6 @@ fun Point.toCv(): org.opencv.core.Point {
|
|||||||
return org.opencv.core.Point(x, y)
|
return org.opencv.core.Point(x, y)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun Size.toImageSize(): ImageSize {
|
||||||
|
return ImageSize(width, height)
|
||||||
|
}
|
||||||
|
|||||||
@@ -103,3 +103,7 @@ fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int):
|
|||||||
fun Point.scaled(scaleX: Double, scaleY: Double): Point {
|
fun Point.scaled(scaleX: Double, scaleY: Double): Point {
|
||||||
return Point((x * scaleX), (y * scaleY))
|
return Point((x * scaleX), (y * scaleY))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data class ImageSize(val width: Double, val height: Double) {
|
||||||
|
constructor(width: Int, height: Int) : this (width.toDouble(), height.toDouble())
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,129 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2025-2026 Pierre-Yves Nicolas
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU General Public License as published by the Free
|
|
||||||
* Software Foundation, either version 3 of the License, or (at your option)
|
|
||||||
* any later version.
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
||||||
* more details.
|
|
||||||
* You should have received a copy of the GNU General Public License along with
|
|
||||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
package org.fairscan.imageprocessing.quad
|
|
||||||
|
|
||||||
import org.opencv.core.Mat
|
|
||||||
import org.opencv.core.CvType
|
|
||||||
import org.opencv.core.Size
|
|
||||||
import org.opencv.core.Point
|
|
||||||
import org.opencv.core.Scalar
|
|
||||||
import org.opencv.core.MatOfPoint
|
|
||||||
import org.opencv.core.MatOfPoint2f
|
|
||||||
import org.opencv.core.Core
|
|
||||||
import org.opencv.imgproc.Imgproc
|
|
||||||
import kotlin.math.abs
|
|
||||||
|
|
||||||
// Look for a threshold for which we find a quad in the mask
|
|
||||||
fun detectDocumentQuadFromProbmap(
|
|
||||||
probmap: Mat,
|
|
||||||
thresholds: List<Double>,
|
|
||||||
useOtsu: Boolean = true,
|
|
||||||
minQuadAreaRatio: Double = 0.02
|
|
||||||
): List<Point>? {
|
|
||||||
val probmapU8 = Mat()
|
|
||||||
probmap.convertTo(probmapU8, CvType.CV_8U, 255.0)
|
|
||||||
val probmapSmooth = Mat()
|
|
||||||
Imgproc.GaussianBlur(probmapU8, probmapSmooth, Size(3.0, 3.0), 0.0)
|
|
||||||
|
|
||||||
var bestScore = 0.0
|
|
||||||
var bestQuad: List<Point>? = null
|
|
||||||
|
|
||||||
// 1) Otsu
|
|
||||||
if (useOtsu) {
|
|
||||||
val otsu = Mat()
|
|
||||||
Imgproc.threshold(probmapSmooth, otsu, 0.0, 255.0, Imgproc.THRESH_BINARY + Imgproc.THRESH_OTSU)
|
|
||||||
val quad = findQuadFromBinaryMask(otsu, minQuadAreaRatio)
|
|
||||||
if (quad != null) {
|
|
||||||
val probFloat = Mat()
|
|
||||||
probmap.convertTo(probFloat, CvType.CV_32F)
|
|
||||||
val sc = scoreQuadAgainstProbmap(quad, probFloat)
|
|
||||||
if (sc > bestScore) {
|
|
||||||
bestScore = sc
|
|
||||||
bestQuad = quad
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2) Threshold sweep
|
|
||||||
for (thr in thresholds) {
|
|
||||||
val bin = Mat()
|
|
||||||
Imgproc.threshold(probmapSmooth, bin, thr * 255.0, 255.0, Imgproc.THRESH_BINARY)
|
|
||||||
val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0))
|
|
||||||
Imgproc.morphologyEx(bin, bin, Imgproc.MORPH_CLOSE, kernel)
|
|
||||||
val quad = findQuadFromBinaryMask(bin, minQuadAreaRatio)
|
|
||||||
if (quad != null) {
|
|
||||||
val probFloat = Mat()
|
|
||||||
probmap.convertTo(probFloat, CvType.CV_32F)
|
|
||||||
val sc = scoreQuadAgainstProbmap(quad, probFloat)
|
|
||||||
if (sc > bestScore) {
|
|
||||||
bestScore = sc
|
|
||||||
bestQuad = quad
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return bestQuad
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill polygon and return binary mask (0/1)
|
|
||||||
fun makePolygonMask(size: Size, polygon: List<Point>): Mat {
|
|
||||||
val mask = Mat.zeros(size, CvType.CV_8U)
|
|
||||||
val pts = MatOfPoint(*polygon.toTypedArray())
|
|
||||||
Imgproc.fillPoly(mask, listOf(pts), Scalar(1.0))
|
|
||||||
return mask
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute score between quad and probmap
|
|
||||||
fun scoreQuadAgainstProbmap(quad: List<Point>, probmap: Mat): Double {
|
|
||||||
val mask = makePolygonMask(probmap.size(), quad)
|
|
||||||
val maskFloat = Mat()
|
|
||||||
mask.convertTo(maskFloat, CvType.CV_32F)
|
|
||||||
val masked = Mat()
|
|
||||||
Core.multiply(probmap, maskFloat, masked)
|
|
||||||
val meanProb = Core.sumElems(masked).`val`[0] / Core.sumElems(maskFloat).`val`[0]
|
|
||||||
val areaRatio = Core.sumElems(maskFloat).`val`[0] / (probmap.rows() * probmap.cols())
|
|
||||||
return meanProb * (0.7 + 0.3 * areaRatio)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find largest quadrilateral in a binary mask
|
|
||||||
fun findQuadFromBinaryMask(binMask: Mat, minQuadAreaRatio: Double = 0.02): List<Point>? {
|
|
||||||
val blurred = Mat()
|
|
||||||
Imgproc.GaussianBlur(binMask, blurred, Size(5.0, 5.0), 0.0)
|
|
||||||
val edges = Mat()
|
|
||||||
Imgproc.Canny(blurred, edges, 75.0, 200.0)
|
|
||||||
|
|
||||||
val contours = mutableListOf<MatOfPoint>()
|
|
||||||
Imgproc.findContours(edges, contours, Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE)
|
|
||||||
|
|
||||||
var biggest: MatOfPoint2f? = null
|
|
||||||
var maxArea = 0.0
|
|
||||||
for (cnt in contours) {
|
|
||||||
val cnt2f = MatOfPoint2f(*cnt.toArray())
|
|
||||||
val peri = Imgproc.arcLength(cnt2f, true)
|
|
||||||
val approx = MatOfPoint2f()
|
|
||||||
Imgproc.approxPolyDP(cnt2f, approx, 0.02 * peri, true)
|
|
||||||
if (approx.rows() == 4) {
|
|
||||||
val area = abs(Imgproc.contourArea(approx))
|
|
||||||
if (area > maxArea) {
|
|
||||||
maxArea = area
|
|
||||||
biggest = approx
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
val totalArea = binMask.rows() * binMask.cols().toDouble()
|
|
||||||
return if (maxArea > totalArea * minQuadAreaRatio && biggest != null) {
|
|
||||||
biggest.toList()
|
|
||||||
} else null
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,308 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2025-2026 Pierre-Yves Nicolas
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License as published by the Free
|
||||||
|
* Software Foundation, either version 3 of the License, or (at your option)
|
||||||
|
* any later version.
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
* more details.
|
||||||
|
* You should have received a copy of the GNU General Public License along with
|
||||||
|
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package org.fairscan.imageprocessing.quad
|
||||||
|
|
||||||
|
import org.opencv.core.Point
|
||||||
|
import kotlin.math.abs
|
||||||
|
import kotlin.math.atan2
|
||||||
|
import kotlin.math.cos
|
||||||
|
import kotlin.math.hypot
|
||||||
|
import kotlin.math.sin
|
||||||
|
|
||||||
|
/**
|
||||||
|
Instead of detecting corners (like Douglas-Peucker), this algorithm detects
|
||||||
|
the four dominant sides of the document by segmenting the contour according
|
||||||
|
to stable edge orientations, then fits lines and intersects them to
|
||||||
|
reconstruct the quadrilateral.
|
||||||
|
*/
|
||||||
|
fun findQuadFromContourOrientation(
|
||||||
|
contour: List<Point>,
|
||||||
|
smoothWindow: Int = 5,
|
||||||
|
maxAngleVar: Double = Math.toRadians(5.0),
|
||||||
|
mergeAngle: Double = Math.toRadians(7.0),
|
||||||
|
minSideLengthRatio: Double = 0.02
|
||||||
|
): List<Point>? {
|
||||||
|
|
||||||
|
if (contour.size < 20) return null
|
||||||
|
|
||||||
|
val angles = computeSmoothedAngles(contour, smoothWindow)
|
||||||
|
|
||||||
|
val perimeter = contour.zipWithNext { a, b -> hypot(b.x - a.x, b.y - a.y) }.sum()
|
||||||
|
|
||||||
|
val minLength = perimeter * minSideLengthRatio
|
||||||
|
val segments = extractSegments(contour, angles, maxAngleVar, minLength)
|
||||||
|
val mergedSegments = mergeSegments(segments, mergeAngle)
|
||||||
|
val dominantSegments = selectDominantSegments(
|
||||||
|
mergedSegments,
|
||||||
|
maxCount = 4,
|
||||||
|
minAngleSeparation = Math.toRadians(25.0)
|
||||||
|
)
|
||||||
|
|
||||||
|
if (dominantSegments.size != 4) return null
|
||||||
|
|
||||||
|
val lines = dominantSegments.map {
|
||||||
|
val points = if (it.start < it.end)
|
||||||
|
contour.subList(it.start, it.end)
|
||||||
|
else
|
||||||
|
contour.subList(it.start, contour.size) + contour.subList(0, it.end)
|
||||||
|
fitLine(points)
|
||||||
|
}
|
||||||
|
|
||||||
|
val corners = mutableListOf<Point>()
|
||||||
|
for (i in 0 until 4) {
|
||||||
|
val p = intersectLines(lines[i], lines[(i + 1) % 4])
|
||||||
|
?: return null
|
||||||
|
corners += p
|
||||||
|
}
|
||||||
|
return corners
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun normalizeAngle(a: Double): Double {
|
||||||
|
var x = a
|
||||||
|
while (x <= -Math.PI) x += 2 * Math.PI
|
||||||
|
while (x > Math.PI) x -= 2 * Math.PI
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun angleDiff(a: Double, b: Double): Double =
|
||||||
|
abs(normalizeAngle(a - b))
|
||||||
|
|
||||||
|
private data class Line(
|
||||||
|
val p: Point,
|
||||||
|
val d: Point
|
||||||
|
)
|
||||||
|
|
||||||
|
private fun fitLine(points: List<Point>): Line {
|
||||||
|
val cx = points.map { it.x }.average()
|
||||||
|
val cy = points.map { it.y }.average()
|
||||||
|
|
||||||
|
var xx = 0.0
|
||||||
|
var xy = 0.0
|
||||||
|
var yy = 0.0
|
||||||
|
|
||||||
|
for (p in points) {
|
||||||
|
val dx = p.x - cx
|
||||||
|
val dy = p.y - cy
|
||||||
|
xx += dx * dx
|
||||||
|
xy += dx * dy
|
||||||
|
yy += dy * dy
|
||||||
|
}
|
||||||
|
|
||||||
|
val theta = 0.5 * atan2(2 * xy, xx - yy)
|
||||||
|
val dir = Point(cos(theta), sin(theta))
|
||||||
|
|
||||||
|
return Line(Point(cx, cy), dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun intersectLines(l1: Line, l2: Line): Point? {
|
||||||
|
val x1 = l1.p.x
|
||||||
|
val y1 = l1.p.y
|
||||||
|
val x2 = x1 + l1.d.x
|
||||||
|
val y2 = y1 + l1.d.y
|
||||||
|
|
||||||
|
val x3 = l2.p.x
|
||||||
|
val y3 = l2.p.y
|
||||||
|
val x4 = x3 + l2.d.x
|
||||||
|
val y4 = y3 + l2.d.y
|
||||||
|
|
||||||
|
val denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
|
||||||
|
if (abs(denom) < 1e-6) return null
|
||||||
|
|
||||||
|
val px =
|
||||||
|
((x1*y2 - y1*x2)*(x3 - x4) - (x1 - x2)*(x3*y4 - y3*x4)) / denom
|
||||||
|
val py =
|
||||||
|
((x1*y2 - y1*x2)*(y3 - y4) - (y1 - y2)*(x3*y4 - y3*x4)) / denom
|
||||||
|
|
||||||
|
return Point(px, py)
|
||||||
|
}
|
||||||
|
|
||||||
|
private data class ContourSegment(
|
||||||
|
val start: Int,
|
||||||
|
val end: Int,
|
||||||
|
val angle: Double,
|
||||||
|
val length: Double
|
||||||
|
)
|
||||||
|
|
||||||
|
private fun extractSegments(
|
||||||
|
contour: List<Point>,
|
||||||
|
angles: DoubleArray,
|
||||||
|
maxAngleVar: Double,
|
||||||
|
minLength: Double
|
||||||
|
): List<ContourSegment> {
|
||||||
|
|
||||||
|
val n = contour.size
|
||||||
|
val result = mutableListOf<ContourSegment>()
|
||||||
|
|
||||||
|
val startIndex = findBestStartIndex(angles)
|
||||||
|
|
||||||
|
var start = startIndex
|
||||||
|
var refAngle = angles[startIndex]
|
||||||
|
|
||||||
|
fun segmentLength(s: Int, e: Int): Double {
|
||||||
|
var len = 0.0
|
||||||
|
var i = s
|
||||||
|
while (i != e) {
|
||||||
|
val j = (i + 1) % n
|
||||||
|
len += hypot(
|
||||||
|
contour[j].x - contour[i].x,
|
||||||
|
contour[j].y - contour[i].y
|
||||||
|
)
|
||||||
|
i = j
|
||||||
|
}
|
||||||
|
return len
|
||||||
|
}
|
||||||
|
|
||||||
|
var steps = 1
|
||||||
|
while (steps <= n) {
|
||||||
|
val idx = (startIndex + steps) % n
|
||||||
|
|
||||||
|
if (steps < n && angleDiff(angles[idx], refAngle) < maxAngleVar) {
|
||||||
|
refAngle = angleMean(refAngle, angles[idx])
|
||||||
|
} else {
|
||||||
|
val len = segmentLength(start, idx)
|
||||||
|
if (len >= minLength) {
|
||||||
|
result += ContourSegment(start, idx, refAngle, len)
|
||||||
|
}
|
||||||
|
start = idx
|
||||||
|
refAngle = angles[idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
steps++
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun findBestStartIndex(angles: DoubleArray): Int {
|
||||||
|
val n = angles.size
|
||||||
|
var bestIndex = 0
|
||||||
|
var bestDelta = 0.0
|
||||||
|
|
||||||
|
for (i in 0 until n) {
|
||||||
|
val j = (i + 1) % n
|
||||||
|
val d = angleDiff(angles[i], angles[j])
|
||||||
|
if (d > bestDelta) {
|
||||||
|
bestDelta = d
|
||||||
|
bestIndex = j
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bestIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun angleMean(a: Double, b: Double): Double {
|
||||||
|
val x = cos(a) + cos(b)
|
||||||
|
val y = sin(a) + sin(b)
|
||||||
|
return atan2(y, x)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun computeSmoothedAngles(
|
||||||
|
contour: List<Point>,
|
||||||
|
window: Int
|
||||||
|
): DoubleArray {
|
||||||
|
val n = contour.size
|
||||||
|
|
||||||
|
// --- Step 1: raw angles ---
|
||||||
|
val angles = DoubleArray(n)
|
||||||
|
for (i in 0 until n) {
|
||||||
|
val p0 = contour[(i - 1 + n) % n]
|
||||||
|
val p1 = contour[(i + 1) % n]
|
||||||
|
angles[i] = atan2(p1.y - p0.y, p1.x - p0.x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Step 2: precompute cos/sin ---
|
||||||
|
val cosA = DoubleArray(n)
|
||||||
|
val sinA = DoubleArray(n)
|
||||||
|
for (i in 0 until n) {
|
||||||
|
cosA[i] = cos(angles[i])
|
||||||
|
sinA[i] = sin(angles[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Step 3: sliding window smoothing ---
|
||||||
|
val smooth = DoubleArray(n)
|
||||||
|
|
||||||
|
var sx = 0.0
|
||||||
|
var sy = 0.0
|
||||||
|
|
||||||
|
// initial window centered on index 0
|
||||||
|
for (k in -window..window) {
|
||||||
|
val idx = (k + n) % n
|
||||||
|
sx += cosA[idx]
|
||||||
|
sy += sinA[idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
smooth[0] = atan2(sy, sx)
|
||||||
|
|
||||||
|
for (i in 1 until n) {
|
||||||
|
val outIdx = (i - window - 1 + n) % n
|
||||||
|
val inIdx = (i + window) % n
|
||||||
|
sx -= cosA[outIdx]
|
||||||
|
sy -= sinA[outIdx]
|
||||||
|
sx += cosA[inIdx]
|
||||||
|
sy += sinA[inIdx]
|
||||||
|
smooth[i] = atan2(sy, sx)
|
||||||
|
}
|
||||||
|
return smooth
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun mergeSegments(
|
||||||
|
segments: List<ContourSegment>,
|
||||||
|
angleThreshold: Double
|
||||||
|
): List<ContourSegment> {
|
||||||
|
if (segments.isEmpty()) return emptyList()
|
||||||
|
if (segments.size <= 4) return segments
|
||||||
|
|
||||||
|
val merged = mutableListOf<ContourSegment>()
|
||||||
|
var cur = segments[0]
|
||||||
|
|
||||||
|
for (i in 1 until segments.size) {
|
||||||
|
val p = segments[i]
|
||||||
|
if (angleDiff(p.angle, cur.angle) < angleThreshold) {
|
||||||
|
cur = ContourSegment(
|
||||||
|
cur.start,
|
||||||
|
p.end,
|
||||||
|
angleMean(cur.angle, p.angle),
|
||||||
|
cur.length + p.length
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
merged += cur
|
||||||
|
cur = p
|
||||||
|
}
|
||||||
|
}
|
||||||
|
merged += cur
|
||||||
|
return merged
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun selectDominantSegments(
|
||||||
|
segments: List<ContourSegment>,
|
||||||
|
maxCount: Int,
|
||||||
|
minAngleSeparation: Double
|
||||||
|
): List<ContourSegment> {
|
||||||
|
|
||||||
|
val sorted = segments.sortedByDescending { it.length }
|
||||||
|
val selected = mutableListOf<ContourSegment>()
|
||||||
|
|
||||||
|
for (p in sorted) {
|
||||||
|
val tooClose = selected.any { s ->
|
||||||
|
angleDiff(p.angle, s.angle) < minAngleSeparation
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!tooClose) {
|
||||||
|
selected += p
|
||||||
|
if (selected.size == maxCount) break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return selected.sortedBy { it.start }
|
||||||
|
}
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2025-2026 Pierre-Yves Nicolas
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License as published by the Free
|
||||||
|
* Software Foundation, either version 3 of the License, or (at your option)
|
||||||
|
* any later version.
|
||||||
|
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
* more details.
|
||||||
|
* You should have received a copy of the GNU General Public License along with
|
||||||
|
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package org.fairscan.imageprocessing.quad
|
||||||
|
|
||||||
|
import org.opencv.core.Core
|
||||||
|
import org.opencv.core.CvType
|
||||||
|
import org.opencv.core.Mat
|
||||||
|
import org.opencv.core.MatOfPoint
|
||||||
|
import org.opencv.core.Point
|
||||||
|
import org.opencv.core.Scalar
|
||||||
|
import org.opencv.core.Size
|
||||||
|
import org.opencv.imgproc.Imgproc
|
||||||
|
|
||||||
|
// Fill polygon and return binary mask (0/1)
|
||||||
|
fun makePolygonMask(size: Size, polygon: List<Point>): Mat {
|
||||||
|
val mask = Mat.zeros(size, CvType.CV_8U)
|
||||||
|
val pts = MatOfPoint(*polygon.toTypedArray())
|
||||||
|
Imgproc.fillPoly(mask, listOf(pts), Scalar(1.0))
|
||||||
|
return mask
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute score between quad and probmap
|
||||||
|
fun scoreQuadAgainstProbmap(quad: List<Point>, probmap: Mat, minQuadAreaRatio: Double): Double {
|
||||||
|
val mask = makePolygonMask(probmap.size(), quad)
|
||||||
|
val maskFloat = Mat()
|
||||||
|
mask.convertTo(maskFloat, CvType.CV_32F)
|
||||||
|
val masked = Mat()
|
||||||
|
Core.multiply(probmap, maskFloat, masked)
|
||||||
|
val meanProb = Core.sumElems(masked).`val`[0] / Core.sumElems(maskFloat).`val`[0]
|
||||||
|
val areaRatio = Core.sumElems(maskFloat).`val`[0] / (probmap.rows() * probmap.cols())
|
||||||
|
return if (areaRatio < minQuadAreaRatio) 0.0 else meanProb * (0.7 + 0.3 * areaRatio)
|
||||||
|
}
|
||||||
@@ -1,133 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2025-2026 Pierre-Yves Nicolas
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU General Public License as published by the Free
|
|
||||||
* Software Foundation, either version 3 of the License, or (at your option)
|
|
||||||
* any later version.
|
|
||||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
||||||
* more details.
|
|
||||||
* You should have received a copy of the GNU General Public License along with
|
|
||||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
package org.fairscan.imageprocessing.quad
|
|
||||||
|
|
||||||
import org.fairscan.imageprocessing.Point
|
|
||||||
import kotlin.math.abs
|
|
||||||
import kotlin.math.acos
|
|
||||||
import kotlin.math.sqrt
|
|
||||||
import kotlin.math.sign
|
|
||||||
|
|
||||||
// Look for 3 consecutive angles that are (almost) right angles
|
|
||||||
fun findQuadFromRightAngles(
|
|
||||||
points: List<Point>,
|
|
||||||
imgWidth: Int,
|
|
||||||
imgHeight: Int,
|
|
||||||
angleMin: Float = 60f,
|
|
||||||
angleMax: Float = 120f
|
|
||||||
): List<Point>? {
|
|
||||||
if (points.size < 4) return null
|
|
||||||
val n = points.size
|
|
||||||
|
|
||||||
val angles = mutableListOf<Double>()
|
|
||||||
for (i in 0 until n) {
|
|
||||||
val a = points[(i + n - 1) % n]
|
|
||||||
val b = points[i]
|
|
||||||
val c = points[(i + 1) % n]
|
|
||||||
angles.add(orientedAngle(a, b, c))
|
|
||||||
}
|
|
||||||
|
|
||||||
var bestQuad: List<Point>? = null
|
|
||||||
var bestScore = Double.POSITIVE_INFINITY
|
|
||||||
|
|
||||||
for (i in 0 until n) {
|
|
||||||
val triplet = listOf(angles[i % n], angles[(i + 1) % n], angles[(i + 2) % n])
|
|
||||||
if (triplet.all { it in angleMin..angleMax }) {
|
|
||||||
val a = points[(i + n - 1) % n]
|
|
||||||
val b = points[i]
|
|
||||||
val c = points[(i + 1) % n]
|
|
||||||
val d = points[(i + 2) % n]
|
|
||||||
val e = points[(i + 3) % n]
|
|
||||||
|
|
||||||
val inter = lineIntersection2(a, b, d, e) ?: continue
|
|
||||||
|
|
||||||
val quad = listOf(b, c, d, inter)
|
|
||||||
|
|
||||||
// ensure inside image bounds
|
|
||||||
if (quad.any { it.x < 0 || it.x >= imgWidth || it.y < 0 || it.y >= imgHeight }) continue
|
|
||||||
|
|
||||||
// ensure convex
|
|
||||||
if (!isConvex(quad)) continue
|
|
||||||
|
|
||||||
val score = quadAngleError(quad)
|
|
||||||
if (score < bestScore) {
|
|
||||||
bestScore = score
|
|
||||||
bestQuad = quad
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return bestQuad
|
|
||||||
}
|
|
||||||
|
|
||||||
fun angleBetween(v1: Point, v2: Point): Float {
|
|
||||||
val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f
|
|
||||||
val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f
|
|
||||||
val dot = (v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)
|
|
||||||
val cosAngle = dot.coerceIn(-1.0, 1.0)
|
|
||||||
return Math.toDegrees(acos(cosAngle).toDouble()).toFloat()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun orientedAngle(a: Point, b: Point, c: Point): Double {
|
|
||||||
val v1 = Point(a.x - b.x, a.y - b.y)
|
|
||||||
val v2 = Point(c.x - b.x, c.y - b.y)
|
|
||||||
val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f
|
|
||||||
val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f
|
|
||||||
val dot = ((v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)).coerceIn(-1.0, 1.0)
|
|
||||||
val cross = v1.x * v2.y - v1.y * v2.x
|
|
||||||
var angle = Math.toDegrees(acos(dot))
|
|
||||||
if (cross < 0) angle = 360.0 - angle
|
|
||||||
return angle
|
|
||||||
}
|
|
||||||
|
|
||||||
fun lineIntersection2(p1: Point, p2: Point, p3: Point, p4: Point): Point? {
|
|
||||||
val denom = (p1.x - p2.x) * (p3.y - p4.y) - (p1.y - p2.y) * (p3.x - p4.x)
|
|
||||||
if (abs(denom) < 1e-6f) return null
|
|
||||||
val numX = (p1.x * p2.y - p1.y * p2.x)
|
|
||||||
val numY = (p3.x * p4.y - p3.y * p4.x)
|
|
||||||
val px = (numX * (p3.x - p4.x) - (p1.x - p2.x) * numY) / denom
|
|
||||||
val py = (numX * (p3.y - p4.y) - (p1.y - p2.y) * numY) / denom
|
|
||||||
return Point(px, py)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun quadAngleError(quad: List<Point>): Double {
|
|
||||||
var err = 0.0
|
|
||||||
for (i in 0 until 4) {
|
|
||||||
val a = quad[(i + 3) % 4]
|
|
||||||
val b = quad[i]
|
|
||||||
val c = quad[(i + 1) % 4]
|
|
||||||
val ang = angleBetween(Point(a.x - b.x, a.y - b.y), Point(c.x - b.x, c.y - b.y))
|
|
||||||
err += abs(ang - 90.0)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
fun isConvex(quad: List<Point>): Boolean {
|
|
||||||
if (quad.size != 4) return false
|
|
||||||
var sign = 0
|
|
||||||
for (i in quad.indices) {
|
|
||||||
val a = quad[i]
|
|
||||||
val b = quad[(i + 1) % 4]
|
|
||||||
val c = quad[(i + 2) % 4]
|
|
||||||
val cross = (b.x - a.x) * (c.y - b.y) - (b.y - a.y) * (c.x - b.x)
|
|
||||||
val currentSign = cross.sign.toInt()
|
|
||||||
if (sign == 0 && currentSign != 0) {
|
|
||||||
sign = currentSign
|
|
||||||
} else if (currentSign != 0 && currentSign != sign) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user