Extract module imageprocessing (#76)
This commit is contained in:
1
imageprocessing/.gitignore
vendored
Normal file
1
imageprocessing/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/build
|
||||
19
imageprocessing/build.gradle.kts
Normal file
19
imageprocessing/build.gradle.kts
Normal file
@@ -0,0 +1,19 @@
|
||||
plugins {
|
||||
id("java-library")
|
||||
alias(libs.plugins.jetbrains.kotlin.jvm)
|
||||
}
|
||||
java {
|
||||
sourceCompatibility = JavaVersion.VERSION_11
|
||||
targetCompatibility = JavaVersion.VERSION_11
|
||||
}
|
||||
kotlin {
|
||||
compilerOptions {
|
||||
jvmTarget = org.jetbrains.kotlin.gradle.dsl.JvmTarget.JVM_11
|
||||
}
|
||||
}
|
||||
dependencies {
|
||||
implementation(libs.opencvjava)
|
||||
|
||||
testImplementation(kotlin("test"))
|
||||
testImplementation(libs.assertj)
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.imageprocessing
|
||||
|
||||
import org.fairscan.imageprocessing.quad.detectDocumentQuadFromProbmap
|
||||
import org.fairscan.imageprocessing.quad.findQuadFromRightAngles
|
||||
import org.fairscan.imageprocessing.quad.minAreaRect
|
||||
import org.opencv.core.Core
|
||||
import org.opencv.core.CvType
|
||||
import org.opencv.core.Mat
|
||||
import org.opencv.core.MatOfPoint
|
||||
import org.opencv.core.MatOfPoint2f
|
||||
import org.opencv.core.Size
|
||||
import org.opencv.imgproc.Imgproc
|
||||
import kotlin.math.abs
|
||||
import kotlin.math.max
|
||||
|
||||
interface Mask {
|
||||
val width: Int
|
||||
val height: Int
|
||||
fun toMat(): Mat
|
||||
}
|
||||
|
||||
fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Double = 0.02): Quad? {
|
||||
val mat = mask.toMat()
|
||||
val (biggest: MatOfPoint2f?, area) = biggestContour(mat)
|
||||
var vertices: List<Point>?
|
||||
if (biggest != null && biggest.total() == 4L && area > mask.width * mask.height * minQuadAreaRatio) {
|
||||
vertices = biggest.toList()?.map { Point(it.x, it.y) }
|
||||
} else {
|
||||
|
||||
// Fallback 1: adjust threshold
|
||||
val thresholds =
|
||||
if (isLiveAnalysis) listOf(25.0, 50.0, 75.0) else (0..12).map { 0.2 + it * 0.05 }
|
||||
vertices = detectDocumentQuadFromProbmap(mat, thresholds)
|
||||
?.map { Point(it.x, it.y) }
|
||||
if (vertices == null && biggest != null && biggest.total() > 4) {
|
||||
|
||||
// Fallback 2: look for right angles
|
||||
val polygon = biggest.toList().map { Point(it.x, it.y) }
|
||||
vertices = findQuadFromRightAngles(polygon, mask.width, mask.height)
|
||||
if (vertices == null && !isLiveAnalysis) {
|
||||
|
||||
// Fallback 3: bounding rectangle
|
||||
vertices = minAreaRect(polygon, mask.width, mask.height)
|
||||
}
|
||||
}
|
||||
}
|
||||
return if (vertices?.size == 4) createQuad(vertices) else null
|
||||
}
|
||||
|
||||
private fun biggestContour(mat: Mat): Pair<MatOfPoint2f?, Double> {
|
||||
val mat8u = Mat()
|
||||
mat.convertTo(mat8u, CvType.CV_8UC1, 255.0)
|
||||
|
||||
val refinedMask = refineMask(mat8u)
|
||||
|
||||
val blurred = Mat()
|
||||
Imgproc.GaussianBlur(refinedMask, blurred, Size(5.0, 5.0), 0.0)
|
||||
|
||||
val edges = Mat()
|
||||
Imgproc.Canny(blurred, edges, 75.0, 200.0)
|
||||
|
||||
val contours = mutableListOf<MatOfPoint>()
|
||||
val hierarchy = Mat()
|
||||
Imgproc.findContours(edges, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
var biggest: MatOfPoint2f? = null
|
||||
var maxArea = 0.0
|
||||
|
||||
for (contour in contours) {
|
||||
val contour2f = MatOfPoint2f(*contour.toArray())
|
||||
val peri = Imgproc.arcLength(contour2f, true)
|
||||
val approx = MatOfPoint2f()
|
||||
Imgproc.approxPolyDP(contour2f, approx, 0.02 * peri, true)
|
||||
|
||||
val area = abs(Imgproc.contourArea(approx))
|
||||
if (area > maxArea) {
|
||||
maxArea = area
|
||||
biggest = approx
|
||||
}
|
||||
}
|
||||
return Pair(biggest, maxArea)
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies morphological operations to improve a document mask.
|
||||
*/
|
||||
fun refineMask(original: Mat): Mat {
|
||||
// Step 0: Ensure the mask is binary (just in case)
|
||||
val binaryMask = Mat()
|
||||
Imgproc.threshold(original, binaryMask, 128.0, 255.0, Imgproc.THRESH_BINARY)
|
||||
|
||||
// Step 1: Closing (fills small holes)
|
||||
val kernelClose = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0))
|
||||
val closed = Mat()
|
||||
Imgproc.morphologyEx(binaryMask, closed, Imgproc.MORPH_CLOSE, kernelClose)
|
||||
|
||||
// Step 2: Gentle opening (removes isolated noise)
|
||||
val kernelOpen = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0))
|
||||
val opened = Mat()
|
||||
Imgproc.morphologyEx(closed, opened, Imgproc.MORPH_OPEN, kernelOpen)
|
||||
|
||||
return opened
|
||||
}
|
||||
|
||||
fun extractDocument(inputMat: Mat, quad: Quad, rotationDegrees: Int): Mat {
|
||||
val widthTop = norm(quad.topLeft, quad.topRight)
|
||||
val widthBottom = norm(quad.bottomLeft, quad.bottomRight)
|
||||
val targetWidth = (widthTop + widthBottom) / 2
|
||||
|
||||
val heightLeft = norm(quad.topLeft, quad.bottomLeft)
|
||||
val heightRight = norm(quad.topRight, quad.bottomRight)
|
||||
val targetHeight = (heightLeft + heightRight) / 2
|
||||
|
||||
val srcPoints = MatOfPoint2f(
|
||||
quad.topLeft.toCv(),
|
||||
quad.topRight.toCv(),
|
||||
quad.bottomRight.toCv(),
|
||||
quad.bottomLeft.toCv(),
|
||||
)
|
||||
val dstPoints = MatOfPoint2f(
|
||||
org.opencv.core.Point(0.0, 0.0),
|
||||
org.opencv.core.Point(targetWidth.toDouble(), 0.0),
|
||||
org.opencv.core.Point(targetWidth.toDouble(), targetHeight.toDouble()),
|
||||
org.opencv.core.Point(0.0, targetHeight.toDouble())
|
||||
)
|
||||
val transform = Imgproc.getPerspectiveTransform(srcPoints, dstPoints)
|
||||
|
||||
val outputMat = Mat()
|
||||
val outputSize = Size(targetWidth.toDouble(), targetHeight.toDouble())
|
||||
Imgproc.warpPerspective(inputMat, outputMat, transform, outputSize)
|
||||
|
||||
val resized = resize(outputMat, 1500.0)
|
||||
val enhanced = enhanceCapturedImage(resized)
|
||||
val rotated = rotate(enhanced, rotationDegrees)
|
||||
|
||||
return rotated
|
||||
}
|
||||
|
||||
fun resize(original: Mat, targetMax: Double): Mat {
|
||||
val origSize = original.size()
|
||||
if (max(origSize.width, origSize.height) < targetMax)
|
||||
return original;
|
||||
var targetWidth = targetMax
|
||||
var targetHeight = origSize.height * targetWidth / origSize.width
|
||||
if (origSize.width < origSize.height) {
|
||||
targetHeight = targetMax
|
||||
targetWidth = origSize.width * targetHeight / origSize.height
|
||||
}
|
||||
val result = Mat()
|
||||
Imgproc.resize(original, result, Size(targetWidth, targetHeight), 0.0, 0.0, Imgproc.INTER_AREA)
|
||||
return result
|
||||
}
|
||||
|
||||
fun rotate(input: Mat, degrees: Int): Mat {
|
||||
val output = Mat()
|
||||
when ((degrees % 360 + 360) % 360) {
|
||||
0 -> input.copyTo(output)
|
||||
90 -> Core.rotate(input, output, Core.ROTATE_90_CLOCKWISE)
|
||||
180 -> Core.rotate(input, output, Core.ROTATE_180)
|
||||
270 -> Core.rotate(input, output, Core.ROTATE_90_COUNTERCLOCKWISE)
|
||||
else -> throw IllegalArgumentException("Only 0, 90, 180, 270 degrees are supported")
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
fun Point.toCv(): org.opencv.core.Point {
|
||||
return org.opencv.core.Point(x, y)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.imageprocessing
|
||||
|
||||
import kotlin.math.atan2
|
||||
import kotlin.math.hypot
|
||||
|
||||
data class Point(val x: Double, val y: Double) {
|
||||
constructor(x: Int, y: Int) : this (x.toDouble(), y.toDouble())
|
||||
}
|
||||
|
||||
data class Line(val from: Point, val to: Point) {
|
||||
fun norm(): Double {
|
||||
return norm(from, to)
|
||||
}
|
||||
}
|
||||
|
||||
fun norm(p1: Point, p2: Point): Double {
|
||||
val dx = (p2.x - p1.x)
|
||||
val dy = (p2.y - p1.y)
|
||||
return hypot(dx, dy)
|
||||
}
|
||||
|
||||
data class Quad(
|
||||
val topLeft: Point,
|
||||
val topRight: Point,
|
||||
val bottomRight: Point,
|
||||
val bottomLeft: Point
|
||||
) {
|
||||
fun edges(): List<Line> {
|
||||
return listOf(
|
||||
Line(topLeft, topRight),
|
||||
Line(topRight, bottomRight),
|
||||
Line(bottomRight, bottomLeft),
|
||||
Line(bottomLeft, topLeft))
|
||||
}
|
||||
|
||||
fun rotate90(iterations: Int, imageWidth: Int, imageHeight: Int): Quad {
|
||||
val rotatedPoints = listOf(
|
||||
rotate90(topLeft, imageWidth, imageHeight, iterations),
|
||||
rotate90(topRight, imageWidth, imageHeight, iterations),
|
||||
rotate90(bottomRight, imageWidth, imageHeight, iterations),
|
||||
rotate90(bottomLeft, imageWidth, imageHeight, iterations)
|
||||
)
|
||||
return createQuad(rotatedPoints)
|
||||
}
|
||||
private fun rotate90(p: Point, width: Int, height: Int, iterations: Int): Point {
|
||||
return when (iterations % 4) {
|
||||
1 -> Point(height - p.y, p.x) // 90°
|
||||
2 -> Point(width - p.x, height - p.y) // 180°
|
||||
3 -> Point(p.y, width - p.x) // 270°
|
||||
else -> p // 0°
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun createQuad(vertices: List<Point>): Quad {
|
||||
require(vertices.size == 4)
|
||||
|
||||
// Centroid of the points
|
||||
val cx = vertices.map { it.x }.average()
|
||||
val cy = vertices.map { it.y }.average()
|
||||
|
||||
// Sort by angle from centroid (clockwise)
|
||||
val sorted = vertices.sortedWith(compareBy {
|
||||
atan2(it.y - cy, it.x - cx)
|
||||
})
|
||||
|
||||
return Quad(sorted[0], sorted[1], sorted[2], sorted[3])
|
||||
}
|
||||
|
||||
fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): Quad {
|
||||
val scaleX = toWidth.toFloat() / fromWidth
|
||||
val scaleY = toHeight.toFloat() / fromHeight
|
||||
return Quad(
|
||||
topLeft = topLeft.scaled(scaleX, scaleY),
|
||||
topRight = topRight.scaled(scaleX, scaleY),
|
||||
bottomRight = bottomRight.scaled(scaleX, scaleY),
|
||||
bottomLeft = bottomLeft.scaled(scaleX, scaleY)
|
||||
)
|
||||
}
|
||||
|
||||
fun Point.scaled(scaleX: Float, scaleY: Float): Point {
|
||||
return Point((x * scaleX), (y * scaleY))
|
||||
}
|
||||
@@ -0,0 +1,202 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.imageprocessing
|
||||
|
||||
import org.opencv.core.Core
|
||||
import org.opencv.core.CvType
|
||||
import org.opencv.core.Mat
|
||||
import org.opencv.core.Scalar
|
||||
import org.opencv.core.Size
|
||||
import org.opencv.imgproc.Imgproc
|
||||
import kotlin.math.max
|
||||
|
||||
fun enhanceCapturedImage(img: Mat): Mat {
|
||||
return if (isColoredDocument(img)) {
|
||||
val result = Mat()
|
||||
Core.convertScaleAbs(img, result, 1.2, 10.0)
|
||||
result
|
||||
} else {
|
||||
val gray = multiScaleRetinex(img)
|
||||
val contrastedGray = enhanceContrastAuto(gray)
|
||||
val result = Mat()
|
||||
Imgproc.cvtColor(contrastedGray, result, Imgproc.COLOR_GRAY2BGR)
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
fun isColoredDocument(
|
||||
img: Mat,
|
||||
chromaThreshold: Double = 20.0,
|
||||
proportionThreshold: Double = 0.001
|
||||
): Boolean {
|
||||
val lab = Mat()
|
||||
Imgproc.cvtColor(img, lab, Imgproc.COLOR_BGR2Lab)
|
||||
|
||||
val channels = ArrayList<Mat>()
|
||||
Core.split(lab, channels)
|
||||
val a = channels[1]
|
||||
val b = channels[2]
|
||||
|
||||
val aFloat = Mat()
|
||||
val bFloat = Mat()
|
||||
a.convertTo(aFloat, CvType.CV_32F)
|
||||
b.convertTo(bFloat, CvType.CV_32F)
|
||||
|
||||
val aShifted = Mat()
|
||||
val bShifted = Mat()
|
||||
Core.subtract(aFloat, Scalar(128.0), aShifted)
|
||||
Core.subtract(bFloat, Scalar(128.0), bShifted)
|
||||
|
||||
val aSq = Mat()
|
||||
val bSq = Mat()
|
||||
Core.multiply(aShifted, aShifted, aSq)
|
||||
Core.multiply(bShifted, bShifted, bSq)
|
||||
|
||||
val sumSq = Mat()
|
||||
Core.add(aSq, bSq, sumSq)
|
||||
|
||||
val chroma = Mat()
|
||||
Core.sqrt(sumSq, chroma)
|
||||
|
||||
val mask = Mat()
|
||||
Imgproc.threshold(chroma, mask, chromaThreshold, 1.0, Imgproc.THRESH_BINARY)
|
||||
val coloredPixels = Core.countNonZero(mask)
|
||||
|
||||
val totalPixels = chroma.rows() * chroma.cols()
|
||||
val proportion = coloredPixels.toDouble() / totalPixels.toDouble()
|
||||
|
||||
lab.release()
|
||||
channels.forEach { it.release() }
|
||||
aFloat.release()
|
||||
bFloat.release()
|
||||
aShifted.release()
|
||||
bShifted.release()
|
||||
aSq.release()
|
||||
bSq.release()
|
||||
sumSq.release()
|
||||
chroma.release()
|
||||
mask.release()
|
||||
|
||||
return proportion > proportionThreshold
|
||||
}
|
||||
|
||||
|
||||
private fun multiScaleRetinex(img: Mat): Mat {
|
||||
val imageSize = img.size()
|
||||
val maxDim = max(imageSize.width, imageSize.height)
|
||||
val kernelSizes: List<Double> = listOf(maxDim / 50, maxDim / 3)
|
||||
|
||||
// Convert to grayscale (1 channel)
|
||||
val gray = Mat()
|
||||
if (img.channels() == 4) {
|
||||
Imgproc.cvtColor(img, gray, Imgproc.COLOR_BGRA2GRAY)
|
||||
} else if (img.channels() == 3) {
|
||||
Imgproc.cvtColor(img, gray, Imgproc.COLOR_BGR2GRAY)
|
||||
} else {
|
||||
img.copyTo(gray)
|
||||
}
|
||||
|
||||
val imgFloat = Mat()
|
||||
gray.convertTo(imgFloat, CvType.CV_32F)
|
||||
Core.add(imgFloat, Scalar(1.0), imgFloat) // img + 1
|
||||
|
||||
val weight = 1.0 / kernelSizes.size
|
||||
val retinex = Mat.zeros(gray.size(), CvType.CV_32F)
|
||||
|
||||
val logImg = Mat()
|
||||
Core.log(imgFloat, logImg)
|
||||
|
||||
val blur = Mat()
|
||||
val logBlur = Mat()
|
||||
val diff = Mat()
|
||||
|
||||
for (kernelSize in kernelSizes) {
|
||||
Imgproc.boxFilter(imgFloat, blur, -1, Size(kernelSize, kernelSize))
|
||||
Core.add(blur, Scalar(1.0), blur)
|
||||
Core.log(blur, logBlur)
|
||||
|
||||
Core.subtract(logImg, logBlur, diff)
|
||||
val diffGray = Mat()
|
||||
if (diff.channels() > 1) {
|
||||
Imgproc.cvtColor(diff, diffGray, Imgproc.COLOR_BGRA2GRAY)
|
||||
} else {
|
||||
diff.copyTo(diffGray)
|
||||
}
|
||||
Core.addWeighted(retinex, 1.0, diffGray, weight, 0.0, retinex)
|
||||
diffGray.release()
|
||||
}
|
||||
|
||||
// Normalize
|
||||
val minMax = Core.minMaxLoc(retinex)
|
||||
val normalized = Mat()
|
||||
Core.subtract(retinex, Scalar(minMax.minVal), normalized)
|
||||
val scale = if (minMax.maxVal > minMax.minVal) 255.0 / (minMax.maxVal - minMax.minVal) else 1.0
|
||||
Core.multiply(normalized, Scalar(scale), normalized)
|
||||
|
||||
val result = Mat()
|
||||
normalized.convertTo(result, CvType.CV_8U)
|
||||
|
||||
// Cleanup
|
||||
gray.release()
|
||||
imgFloat.release()
|
||||
retinex.release()
|
||||
logImg.release()
|
||||
blur.release()
|
||||
logBlur.release()
|
||||
diff.release()
|
||||
normalized.release()
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
private fun enhanceContrastAuto(img: Mat): Mat {
|
||||
val gray = if (img.channels() == 1) img else {
|
||||
val tmp = Mat()
|
||||
Imgproc.cvtColor(img, tmp, Imgproc.COLOR_BGR2GRAY)
|
||||
tmp
|
||||
}
|
||||
|
||||
// Flatten and sort pixel values
|
||||
val flat = Mat()
|
||||
gray.reshape(1, 1).convertTo(flat, CvType.CV_32F)
|
||||
val sortedVals = Mat()
|
||||
Core.sort(flat, sortedVals, Core.SORT_ASCENDING)
|
||||
|
||||
val totalPixels = sortedVals.cols()
|
||||
val pLow = sortedVals.get(0, (totalPixels * 0.005).toInt())[0]
|
||||
val pHigh = sortedVals.get(0, (totalPixels * 0.80).toInt())[0]
|
||||
|
||||
flat.release()
|
||||
sortedVals.release()
|
||||
|
||||
val imgF = Mat()
|
||||
img.convertTo(imgF, CvType.CV_32F)
|
||||
val adjusted = Mat()
|
||||
Core.subtract(imgF, Scalar(pLow), adjusted)
|
||||
Core.multiply(adjusted, Scalar(255.0 / max((pHigh - pLow), 1.0)), adjusted)
|
||||
Core.min(adjusted, Scalar(255.0), adjusted)
|
||||
Core.max(adjusted, Scalar(0.0), adjusted)
|
||||
|
||||
val result = Mat()
|
||||
adjusted.convertTo(result, CvType.CV_8U)
|
||||
imgF.release()
|
||||
adjusted.release()
|
||||
|
||||
val final = Mat()
|
||||
Core.convertScaleAbs(result, final, 1.15, -25.0)
|
||||
result.release()
|
||||
|
||||
return final
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.imageprocessing.quad
|
||||
|
||||
import org.opencv.core.Mat
|
||||
import org.opencv.core.CvType
|
||||
import org.opencv.core.Size
|
||||
import org.opencv.core.Point
|
||||
import org.opencv.core.Scalar
|
||||
import org.opencv.core.MatOfPoint
|
||||
import org.opencv.core.MatOfPoint2f
|
||||
import org.opencv.core.Core
|
||||
import org.opencv.imgproc.Imgproc
|
||||
import kotlin.math.abs
|
||||
|
||||
// Look for a threshold for which we find a quad in the mask
|
||||
fun detectDocumentQuadFromProbmap(
|
||||
probmap: Mat,
|
||||
thresholds: List<Double>,
|
||||
useOtsu: Boolean = true,
|
||||
minQuadAreaRatio: Double = 0.02
|
||||
): List<Point>? {
|
||||
val probmapU8 = Mat()
|
||||
probmap.convertTo(probmapU8, CvType.CV_8U, 255.0)
|
||||
val probmapSmooth = Mat()
|
||||
Imgproc.GaussianBlur(probmapU8, probmapSmooth, Size(3.0, 3.0), 0.0)
|
||||
|
||||
var bestScore = 0.0
|
||||
var bestQuad: List<Point>? = null
|
||||
|
||||
// 1) Otsu
|
||||
if (useOtsu) {
|
||||
val otsu = Mat()
|
||||
Imgproc.threshold(probmapSmooth, otsu, 0.0, 255.0, Imgproc.THRESH_BINARY + Imgproc.THRESH_OTSU)
|
||||
val quad = findQuadFromBinaryMask(otsu, minQuadAreaRatio)
|
||||
if (quad != null) {
|
||||
val probFloat = Mat()
|
||||
probmap.convertTo(probFloat, CvType.CV_32F)
|
||||
val sc = scoreQuadAgainstProbmap(quad, probFloat)
|
||||
if (sc > bestScore) {
|
||||
bestScore = sc
|
||||
bestQuad = quad
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Threshold sweep
|
||||
for (thr in thresholds) {
|
||||
val bin = Mat()
|
||||
Imgproc.threshold(probmapSmooth, bin, thr * 255.0, 255.0, Imgproc.THRESH_BINARY)
|
||||
val kernel = Imgproc.getStructuringElement(Imgproc.MORPH_ELLIPSE, Size(5.0, 5.0))
|
||||
Imgproc.morphologyEx(bin, bin, Imgproc.MORPH_CLOSE, kernel)
|
||||
val quad = findQuadFromBinaryMask(bin, minQuadAreaRatio)
|
||||
if (quad != null) {
|
||||
val probFloat = Mat()
|
||||
probmap.convertTo(probFloat, CvType.CV_32F)
|
||||
val sc = scoreQuadAgainstProbmap(quad, probFloat)
|
||||
if (sc > bestScore) {
|
||||
bestScore = sc
|
||||
bestQuad = quad
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bestQuad
|
||||
}
|
||||
|
||||
// Fill polygon and return binary mask (0/1)
|
||||
fun makePolygonMask(size: Size, polygon: List<Point>): Mat {
|
||||
val mask = Mat.zeros(size, CvType.CV_8U)
|
||||
val pts = MatOfPoint(*polygon.toTypedArray())
|
||||
Imgproc.fillPoly(mask, listOf(pts), Scalar(1.0))
|
||||
return mask
|
||||
}
|
||||
|
||||
// Compute score between quad and probmap
|
||||
fun scoreQuadAgainstProbmap(quad: List<Point>, probmap: Mat): Double {
|
||||
val mask = makePolygonMask(probmap.size(), quad)
|
||||
val maskFloat = Mat()
|
||||
mask.convertTo(maskFloat, CvType.CV_32F)
|
||||
val masked = Mat()
|
||||
Core.multiply(probmap, maskFloat, masked)
|
||||
val meanProb = Core.sumElems(masked).`val`[0] / Core.sumElems(maskFloat).`val`[0]
|
||||
val areaRatio = Core.sumElems(maskFloat).`val`[0] / (probmap.rows() * probmap.cols())
|
||||
return meanProb * (0.7 + 0.3 * areaRatio)
|
||||
}
|
||||
|
||||
// Find largest quadrilateral in a binary mask
|
||||
fun findQuadFromBinaryMask(binMask: Mat, minQuadAreaRatio: Double = 0.02): List<Point>? {
|
||||
val blurred = Mat()
|
||||
Imgproc.GaussianBlur(binMask, blurred, Size(5.0, 5.0), 0.0)
|
||||
val edges = Mat()
|
||||
Imgproc.Canny(blurred, edges, 75.0, 200.0)
|
||||
|
||||
val contours = mutableListOf<MatOfPoint>()
|
||||
Imgproc.findContours(edges, contours, Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
var biggest: MatOfPoint2f? = null
|
||||
var maxArea = 0.0
|
||||
for (cnt in contours) {
|
||||
val cnt2f = MatOfPoint2f(*cnt.toArray())
|
||||
val peri = Imgproc.arcLength(cnt2f, true)
|
||||
val approx = MatOfPoint2f()
|
||||
Imgproc.approxPolyDP(cnt2f, approx, 0.02 * peri, true)
|
||||
if (approx.rows() == 4) {
|
||||
val area = abs(Imgproc.contourArea(approx))
|
||||
if (area > maxArea) {
|
||||
maxArea = area
|
||||
biggest = approx
|
||||
}
|
||||
}
|
||||
}
|
||||
val totalArea = binMask.rows() * binMask.cols().toDouble()
|
||||
return if (maxArea > totalArea * minQuadAreaRatio && biggest != null) {
|
||||
biggest.toList()
|
||||
} else null
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.imageprocessing.quad
|
||||
|
||||
import org.fairscan.imageprocessing.Point
|
||||
import kotlin.math.cos
|
||||
import kotlin.math.sin
|
||||
|
||||
// Look for a minimal rectangle that covers a given polygon
|
||||
fun minAreaRect(polygon: List<Point>, imgWidth: Int? = null, imgHeight: Int? = null): List<Point>? {
|
||||
if (polygon.size < 3) return null
|
||||
|
||||
val hull = convexHull(polygon)
|
||||
if (hull.size < 3) return hull
|
||||
|
||||
var bestArea = Double.POSITIVE_INFINITY
|
||||
var bestRect: List<Point>? = null
|
||||
|
||||
// Test 90 rotation angles between 0 and π/2
|
||||
for (deg in 0 until 90) {
|
||||
val angle = Math.toRadians(deg.toDouble())
|
||||
val cosA = cos(angle)
|
||||
val sinA = sin(angle)
|
||||
|
||||
// Rotation matrix
|
||||
val rotX = { p: Point -> p.x * cosA - p.y * sinA }
|
||||
val rotY = { p: Point -> p.x * sinA + p.y * cosA }
|
||||
|
||||
val rotated = hull.map { Point(rotX(it), rotY(it)) }
|
||||
|
||||
val minX = rotated.minOf { it.x }
|
||||
val maxX = rotated.maxOf { it.x }
|
||||
val minY = rotated.minOf { it.y }
|
||||
val maxY = rotated.maxOf { it.y }
|
||||
|
||||
val area = (maxX - minX) * (maxY - minY)
|
||||
if (area < bestArea) {
|
||||
bestArea = area
|
||||
|
||||
val rectRot = listOf(
|
||||
Point(minX, minY),
|
||||
Point(maxX, minY),
|
||||
Point(maxX, maxY),
|
||||
Point(minX, maxY)
|
||||
)
|
||||
|
||||
// Apply inverse rotation
|
||||
val invX = { p: Point -> p.x * cosA + p.y * sinA }
|
||||
val invY = { p: Point -> -p.x * sinA + p.y * cosA }
|
||||
val rect = rectRot.map { Point(invX(it), invY(it)) }
|
||||
|
||||
bestRect = rect
|
||||
}
|
||||
}
|
||||
|
||||
if (bestRect == null) return null
|
||||
|
||||
// Optionally clip within image bounds
|
||||
if (imgWidth != null && imgHeight != null) {
|
||||
val w = imgWidth - 1.0
|
||||
val h = imgHeight - 1.0
|
||||
return bestRect.map {
|
||||
Point(it.x.coerceIn(0.0, w), it.y.coerceIn(0.0, h))
|
||||
}
|
||||
}
|
||||
|
||||
return bestRect
|
||||
}
|
||||
|
||||
fun convexHull(points: List<Point>): List<Point> {
|
||||
val unique = points.distinctBy { Pair(it.x, it.y) }
|
||||
if (unique.size <= 3) return unique
|
||||
|
||||
val sorted = unique.sortedWith(compareBy({ it.x }, { it.y }))
|
||||
|
||||
fun cross(o: Point, a: Point, b: Point): Double {
|
||||
return (a.x - o.x) * (b.y - o.y) - (a.y - o.y) * (b.x - o.x)
|
||||
}
|
||||
|
||||
val lower = mutableListOf<Point>()
|
||||
for (p in sorted) {
|
||||
while (lower.size >= 2 && cross(lower[lower.size - 2], lower.last(), p) <= 0f) {
|
||||
lower.removeAt(lower.lastIndex)
|
||||
}
|
||||
lower.add(p)
|
||||
}
|
||||
|
||||
val upper = mutableListOf<Point>()
|
||||
for (p in sorted.asReversed()) {
|
||||
while (upper.size >= 2 && cross(upper[upper.size - 2], upper.last(), p) <= 0f) {
|
||||
upper.removeAt(upper.lastIndex)
|
||||
}
|
||||
upper.add(p)
|
||||
}
|
||||
|
||||
// Remove last element of each list to avoid duplication
|
||||
val hull = lower.dropLast(1) + upper.dropLast(1)
|
||||
return hull
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.imageprocessing.quad
|
||||
|
||||
import org.fairscan.imageprocessing.Point
|
||||
import kotlin.math.abs
|
||||
import kotlin.math.acos
|
||||
import kotlin.math.sqrt
|
||||
import kotlin.math.sign
|
||||
|
||||
// Look for 3 consecutive angles that are (almost) right angles
|
||||
fun findQuadFromRightAngles(
|
||||
points: List<Point>,
|
||||
imgWidth: Int,
|
||||
imgHeight: Int,
|
||||
angleMin: Float = 60f,
|
||||
angleMax: Float = 120f
|
||||
): List<Point>? {
|
||||
if (points.size < 4) return null
|
||||
val n = points.size
|
||||
|
||||
val angles = mutableListOf<Double>()
|
||||
for (i in 0 until n) {
|
||||
val a = points[(i + n - 1) % n]
|
||||
val b = points[i]
|
||||
val c = points[(i + 1) % n]
|
||||
angles.add(orientedAngle(a, b, c))
|
||||
}
|
||||
|
||||
var bestQuad: List<Point>? = null
|
||||
var bestScore = Double.POSITIVE_INFINITY
|
||||
|
||||
for (i in 0 until n) {
|
||||
val triplet = listOf(angles[i % n], angles[(i + 1) % n], angles[(i + 2) % n])
|
||||
if (triplet.all { it in angleMin..angleMax }) {
|
||||
val a = points[(i + n - 1) % n]
|
||||
val b = points[i]
|
||||
val c = points[(i + 1) % n]
|
||||
val d = points[(i + 2) % n]
|
||||
val e = points[(i + 3) % n]
|
||||
|
||||
val inter = lineIntersection2(a, b, d, e) ?: continue
|
||||
|
||||
val quad = listOf(b, c, d, inter)
|
||||
|
||||
// ensure inside image bounds
|
||||
if (quad.any { it.x < 0 || it.x >= imgWidth || it.y < 0 || it.y >= imgHeight }) continue
|
||||
|
||||
// ensure convex
|
||||
if (!isConvex(quad)) continue
|
||||
|
||||
val score = quadAngleError(quad)
|
||||
if (score < bestScore) {
|
||||
bestScore = score
|
||||
bestQuad = quad
|
||||
}
|
||||
}
|
||||
}
|
||||
return bestQuad
|
||||
}
|
||||
|
||||
fun angleBetween(v1: Point, v2: Point): Float {
|
||||
val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f
|
||||
val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f
|
||||
val dot = (v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)
|
||||
val cosAngle = dot.coerceIn(-1.0, 1.0)
|
||||
return Math.toDegrees(acos(cosAngle).toDouble()).toFloat()
|
||||
}
|
||||
|
||||
fun orientedAngle(a: Point, b: Point, c: Point): Double {
|
||||
val v1 = Point(a.x - b.x, a.y - b.y)
|
||||
val v2 = Point(c.x - b.x, c.y - b.y)
|
||||
val norm1 = sqrt(v1.x * v1.x + v1.y * v1.y) + 1e-9f
|
||||
val norm2 = sqrt(v2.x * v2.x + v2.y * v2.y) + 1e-9f
|
||||
val dot = ((v1.x * v2.x + v1.y * v2.y) / (norm1 * norm2)).coerceIn(-1.0, 1.0)
|
||||
val cross = v1.x * v2.y - v1.y * v2.x
|
||||
var angle = Math.toDegrees(acos(dot))
|
||||
if (cross < 0) angle = 360.0 - angle
|
||||
return angle
|
||||
}
|
||||
|
||||
fun lineIntersection2(p1: Point, p2: Point, p3: Point, p4: Point): Point? {
|
||||
val denom = (p1.x - p2.x) * (p3.y - p4.y) - (p1.y - p2.y) * (p3.x - p4.x)
|
||||
if (abs(denom) < 1e-6f) return null
|
||||
val numX = (p1.x * p2.y - p1.y * p2.x)
|
||||
val numY = (p3.x * p4.y - p3.y * p4.x)
|
||||
val px = (numX * (p3.x - p4.x) - (p1.x - p2.x) * numY) / denom
|
||||
val py = (numX * (p3.y - p4.y) - (p1.y - p2.y) * numY) / denom
|
||||
return Point(px, py)
|
||||
}
|
||||
|
||||
fun quadAngleError(quad: List<Point>): Double {
|
||||
var err = 0.0
|
||||
for (i in 0 until 4) {
|
||||
val a = quad[(i + 3) % 4]
|
||||
val b = quad[i]
|
||||
val c = quad[(i + 1) % 4]
|
||||
val ang = angleBetween(Point(a.x - b.x, a.y - b.y), Point(c.x - b.x, c.y - b.y))
|
||||
err += abs(ang - 90.0)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
fun isConvex(quad: List<Point>): Boolean {
|
||||
if (quad.size != 4) return false
|
||||
var sign = 0
|
||||
for (i in quad.indices) {
|
||||
val a = quad[i]
|
||||
val b = quad[(i + 1) % 4]
|
||||
val c = quad[(i + 2) % 4]
|
||||
val cross = (b.x - a.x) * (c.y - b.y) - (b.y - a.y) * (c.x - b.x)
|
||||
val currentSign = cross.sign.toInt()
|
||||
if (sign == 0 && currentSign != 0) {
|
||||
sign = currentSign
|
||||
} else if (currentSign != 0 && currentSign != sign) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.imageprocessing
|
||||
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
import org.assertj.core.api.Assertions.assertThatThrownBy
|
||||
import org.junit.Test
|
||||
|
||||
class GeometryTest {
|
||||
|
||||
@Test
|
||||
fun line() {
|
||||
assertThat(Line(Point(0, 0), Point(10, 0)).norm()).isEqualTo(10.0)
|
||||
assertThat(Line(Point(1, 2), Point(4, 6)).norm()).isEqualTo(5.0)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun createQuad() {
|
||||
val quad = createQuad(listOf(
|
||||
Point(3, 9), Point(1,2), Point(11,12), Point(10, 3)))
|
||||
assertThat(quad).isEqualTo(
|
||||
Quad(Point(1,2), Point(10, 3), Point(11,12), Point(3, 9)))
|
||||
assertThatThrownBy { createQuad(listOf()) }
|
||||
.isInstanceOf(IllegalArgumentException::class.java)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun rotateQuad() {
|
||||
val quad = createQuad(listOf(
|
||||
Point(1,2), Point(10, 3), Point(11,12), Point(3, 9)))
|
||||
assertThat(quad.rotate90(1, 100, 50)).isEqualTo(
|
||||
createQuad(listOf(
|
||||
Point(48,1), Point(47, 10), Point(38,11), Point(41, 3)
|
||||
)))
|
||||
assertThat(quad.rotate90(2, 100, 50)).isEqualTo(
|
||||
createQuad(listOf(
|
||||
Point(99,48), Point(90, 47), Point(89,38), Point(97, 41)
|
||||
)))
|
||||
assertThat(quad.rotate90(3, 100, 50)).isEqualTo(
|
||||
createQuad(listOf(
|
||||
Point(2,99), Point(3, 90), Point(12,89), Point(9, 97)
|
||||
)))
|
||||
assertThat(quad.rotate90(4, 100, 50)).isEqualTo(quad)
|
||||
assertThat(quad.rotate90(5, 100, 50)).isEqualTo(
|
||||
quad.rotate90(1, 100, 50)
|
||||
)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user