) {
+ val sb = StringBuilder()
+
+ sb.append("")
+ sb.append("Color Detection Evaluation
")
+ sb.append("Total: ${score.total}
")
+ sb.append("Mismatches: ${score.mismatchCount}
")
+ sb.append("Accuracy: ${"%.2f".format(score.accuracy * 100)}%
")
+
+ score.byCase.forEach { (case, count) ->
+ sb.append("expectedColor=${case.expected} / detectedColor=${case.detected} : $count
")
+ }
+
+ for (c in listOf(ColorCase(true, false), ColorCase(false, true))) {
+ sb.append("expectedColor=${c.expected} / detectedColor=${c.detected}
")
+ for (r in results.filter { it.colorCase == c }) {
+ sb.append(
+ """
+
+ """.trimIndent()
+ )
+ }
+ }
+
+ sb.append("")
+ output.writeText(sb.toString())
+ }
+}
+
+
diff --git a/evaluation/src/main/java/org/fairscan/evaluation/CsvMetadata.kt b/evaluation/src/main/java/org/fairscan/evaluation/CsvMetadata.kt
new file mode 100644
index 0000000..44a7ea3
--- /dev/null
+++ b/evaluation/src/main/java/org/fairscan/evaluation/CsvMetadata.kt
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2025 Pierre-Yves Nicolas
+ *
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see .
+ */
+package org.fairscan.evaluation
+
+import java.io.File
+
+data class ImageMeta(
+ val imgName: String,
+ val docId: String
+)
+
+data class DocumentMeta(
+ val docId: String,
+ val isColored: Boolean
+)
+
+object CsvMetadata {
+
+ fun readImagesCsv(file: File): List {
+ return file.readLines()
+ .drop(1) // skip header
+ .map { line ->
+ val cols = line.split(',')
+ ImageMeta(
+ imgName = cols[0].trim(),
+ docId = cols[1].trim()
+ )
+ }
+ }
+
+ fun readDocumentsCsv(file: File): Map {
+ return file.readLines()
+ .drop(1)
+ .map { line ->
+ val cols = line.split(',')
+ val docId = cols[0].trim()
+ val isColored = cols[1].trim().equals("TRUE", ignoreCase = true)
+ DocumentMeta(docId, isColored)
+ }
+ .associateBy { it.docId }
+ }
+}
diff --git a/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt b/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt
index 511f972..5f2f42c 100644
--- a/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt
+++ b/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt
@@ -31,7 +31,7 @@ class MatMask(private val mat: Mat) : Mask {
override val width: Int get() = mat.width()
override val height: Int get() = mat.height()
- override fun toMat(): Mat = mat
+ override fun toMat(): Mat = mat.clone()
}
object DatasetEvaluator {
@@ -71,7 +71,7 @@ object DatasetEvaluator {
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
val corrected: Mat? = if (quad != null) {
- extractDocument(inputMat, quad = quad, rotationDegrees = 0)
+ extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask)
} else null
val inputOut = File(outputDir, "${e.name}_input.jpg")
diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/ColorDetection.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/ColorDetection.kt
new file mode 100644
index 0000000..511393d
--- /dev/null
+++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/ColorDetection.kt
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2025 Pierre-Yves Nicolas
+ *
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see .
+ */
+package org.fairscan.imageprocessing
+
+import org.opencv.core.Core
+import org.opencv.core.CvType
+import org.opencv.core.CvType.CV_8UC1
+import org.opencv.core.Mat
+import org.opencv.core.Mat.zeros
+import org.opencv.core.MatOfPoint
+import org.opencv.core.Scalar
+import org.opencv.core.Size
+import org.opencv.imgproc.Imgproc
+import org.opencv.imgproc.Imgproc.fillConvexPoly
+import kotlin.math.roundToInt
+import kotlin.math.sqrt
+
+fun isColoredDocument(
+ img: Mat,
+ mask: Mask,
+ quad: Quad,
+ chromaThreshold: Double = 17.5,
+ proportionThreshold: Double = 0.0003,
+ luminanceMin: Double = 40.0,
+ luminanceMax: Double = 180.0
+): Boolean {
+
+ // Work on a reasonable size (for correct performance)
+ val resizedImg = resizeForMaxPixels(img, 1024.0 * 768.0)
+ val workSize = resizedImg.size()
+
+ // 1) Compute doc mask (mask ∩ quad)
+ val docMask = documentMask(mask, quad, img.size(), workSize)
+
+ // 2) Apply white balance only inside document
+ val whiteBalanced = applyGrayWorldToDocument(resizedImg, docMask)
+
+ // 3) Convert to Lab, see https://en.wikipedia.org/wiki/CIELAB_color_space
+ val lab = Mat()
+ Imgproc.cvtColor(whiteBalanced, lab, Imgproc.COLOR_BGR2Lab)
+
+ // 4) Split Lab
+ val channels = ArrayList()
+ Core.split(lab, channels)
+ val luminance = channels[0]
+ val a = channels[1]
+ val b = channels[2]
+
+ // 5) Compute chroma
+ val chroma = chroma(a, b)
+
+ val colorMask = Mat()
+ Imgproc.threshold(chroma, colorMask, chromaThreshold, 255.0, Imgproc.THRESH_BINARY)
+ colorMask.convertTo(colorMask, CvType.CV_8U)
+
+ // 6) Create luminance mask L ∈ [luminanceMin, luminanceMax]
+ val luminanceMask = Mat()
+ Core.inRange(luminance, Scalar(luminanceMin), Scalar(luminanceMax), luminanceMask)
+
+ // 7) Combine colorMask & luminanceMask & docMask
+ val tmp = Mat()
+ Core.bitwise_and(colorMask, luminanceMask, tmp)
+
+ val restrictedMask = Mat()
+ Core.bitwise_and(tmp, docMask, restrictedMask)
+
+ val coloredPixels = Core.countNonZero(restrictedMask)
+ val totalPixels = Core.countNonZero(docMask)
+
+ // 8) Cleanup
+ resizedImg.release()
+ whiteBalanced.release()
+ lab.release()
+ channels.forEach { it.release() }
+ chroma.release()
+ colorMask.release()
+ luminanceMask.release()
+ tmp.release()
+ restrictedMask.release()
+ docMask.release()
+
+ if (totalPixels == 0) return false
+
+ val proportion = coloredPixels.toDouble() / totalPixels.toDouble()
+ return proportion > proportionThreshold
+}
+
+private fun resizeForMaxPixels(img: Mat, maxPixels: Double): Mat {
+ val origPixels = img.width() * img.height()
+ if (origPixels <= maxPixels) {
+ return img.clone()
+ }
+ val scale = sqrt(maxPixels / origPixels)
+ val size = Size(img.width() * scale, img.height() * scale)
+ val resizedImg = Mat()
+ Imgproc.resize(img, resizedImg, size, 0.0, 0.0, Imgproc.INTER_AREA)
+ return resizedImg
+}
+
+private fun chroma(a: Mat, b: Mat): Mat {
+ val aFloat = Mat()
+ val bFloat = Mat()
+ a.convertTo(aFloat, CvType.CV_32F)
+ b.convertTo(bFloat, CvType.CV_32F)
+
+ val aShifted = Mat()
+ val bShifted = Mat()
+ Core.subtract(aFloat, Scalar(128.0), aShifted)
+ Core.subtract(bFloat, Scalar(128.0), bShifted)
+
+ val chroma = Mat()
+ Core.magnitude(aShifted, bShifted, chroma)
+
+ aFloat.release()
+ bFloat.release()
+ aShifted.release()
+ bShifted.release()
+
+ return chroma
+}
+
+private fun erodeBorder(mask: Mat, quad: Quad): Mat {
+ val minDim = quad.edges().minOf { it.norm() }
+ var k = (minDim * 0.02).roundToInt()
+ k = k.coerceIn(3, 15)
+ if (k % 2 == 0) k += 1
+
+ val kernel = Imgproc.getStructuringElement(
+ Imgproc.MORPH_ELLIPSE,
+ Size(k.toDouble(), k.toDouble())
+ )
+ val erodedMask = Mat()
+ Imgproc.morphologyEx(mask, erodedMask, Imgproc.MORPH_ERODE, kernel)
+ kernel.release()
+ return erodedMask
+}
+
+private fun documentMask(
+ mask: Mask,
+ quad: Quad,
+ origSize: Size,
+ workSize: Size,
+): Mat {
+ val resizedMask = Mat()
+ val maskMat = mask.toMat()
+ Imgproc.resize(maskMat, resizedMask, workSize, 0.0, 0.0, Imgproc.INTER_AREA)
+ val resizedQuad = quad.scaledTo(
+ origSize.width, origSize.height, workSize.width, workSize.height
+ )
+ val erodedMask = erodeBorder(resizedMask, resizedQuad)
+ val quadMask = zeros(erodedMask.size(), CV_8UC1)
+ val pts = MatOfPoint(
+ resizedQuad.topLeft.toCv(), resizedQuad.topRight.toCv(), resizedQuad.bottomRight.toCv(), resizedQuad.bottomLeft.toCv())
+ fillConvexPoly(quadMask, pts, Scalar(255.0))
+
+ val docMask = Mat()
+ Core.bitwise_and(erodedMask, quadMask, docMask)
+
+ quadMask.release()
+ pts.release()
+ erodedMask.release()
+ resizedMask.release()
+ maskMat.release()
+
+ return docMask
+}
+
+fun applyGrayWorldToDocument(
+ img: Mat,
+ docMask: Mat,
+): Mat {
+ require(img.type() == CvType.CV_8UC3)
+
+ val nonZero = Core.countNonZero(docMask)
+ if (nonZero == 0) {
+ docMask.release()
+ return img.clone()
+ }
+
+ // compute mean per channel on docMask (B,G,R)
+ val meanScalar = Core.mean(img, docMask) // Scalar(bMean, gMean, rMean, alpha)
+ val meanB = meanScalar.`val`[0]
+ val meanG = meanScalar.`val`[1]
+ val meanR = meanScalar.`val`[2]
+
+ // safety: avoid division by very small values
+ val eps = 1e-6
+ val meanBsafe = if (meanB < eps) eps else meanB
+ val meanGsafe = if (meanG < eps) eps else meanG
+ val meanRsafe = if (meanR < eps) eps else meanR
+
+ val meanGray = (meanBsafe + meanGsafe + meanRsafe) / 3.0
+
+ val scaleB = meanGray / meanBsafe
+ val scaleG = meanGray / meanGsafe
+ val scaleR = meanGray / meanRsafe
+
+ // apply per-channel scaling only on docMask
+ // convert to float
+ val imgF = Mat()
+ img.convertTo(imgF, CvType.CV_32FC3)
+
+ // build scales scalar in BGR order
+ val scales = Scalar(scaleB, scaleG, scaleR)
+
+ // prepare scaled full image (float)
+ val scaledF = Mat()
+ Core.multiply(imgF, scales, scaledF)
+
+ // convert scaledF back to 8U
+ val scaled8 = Mat()
+ scaledF.convertTo(scaled8, CvType.CV_8UC3)
+
+ // result = original copy, then copy scaled pixels where docMask != 0
+ val result = img.clone()
+ scaled8.copyTo(result, docMask)
+
+ // cleanup
+ imgF.release()
+ scaledF.release()
+ scaled8.release()
+
+ return result
+}
diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt
index 6078447..996eb02 100644
--- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt
+++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt
@@ -18,7 +18,6 @@ import org.fairscan.imageprocessing.quad.detectDocumentQuadFromProbmap
import org.fairscan.imageprocessing.quad.findQuadFromRightAngles
import org.fairscan.imageprocessing.quad.minAreaRect
import org.opencv.core.Core
-import org.opencv.core.CvType
import org.opencv.core.Mat
import org.opencv.core.MatOfPoint
import org.opencv.core.MatOfPoint2f
@@ -62,10 +61,7 @@ fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Do
}
private fun biggestContour(mat: Mat): Pair {
- val mat8u = Mat()
- mat.convertTo(mat8u, CvType.CV_8UC1, 255.0)
-
- val refinedMask = refineMask(mat8u)
+ val refinedMask = refineMask(mat)
val blurred = Mat()
Imgproc.GaussianBlur(refinedMask, blurred, Size(5.0, 5.0), 0.0)
@@ -116,7 +112,12 @@ fun refineMask(original: Mat): Mat {
return opened
}
-fun extractDocument(inputMat: Mat, quad: Quad, rotationDegrees: Int): Mat {
+fun extractDocument(
+ inputMat: Mat,
+ quad: Quad,
+ rotationDegrees: Int,
+ mask: Mask,
+): Mat {
val widthTop = norm(quad.topLeft, quad.topRight)
val widthBottom = norm(quad.bottomLeft, quad.bottomRight)
val targetWidth = (widthTop + widthBottom) / 2
@@ -144,7 +145,8 @@ fun extractDocument(inputMat: Mat, quad: Quad, rotationDegrees: Int): Mat {
Imgproc.warpPerspective(inputMat, outputMat, transform, outputSize)
val resized = resize(outputMat, 1500.0)
- val enhanced = enhanceCapturedImage(resized)
+ val isColored = isColoredDocument(inputMat, mask, quad)
+ val enhanced = enhanceCapturedImage(resized, isColored)
val rotated = rotate(enhanced, rotationDegrees)
return rotated
diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt
index ebd8aa9..f0af04b 100644
--- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt
+++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt
@@ -81,9 +81,9 @@ fun createQuad(vertices: List): Quad {
return Quad(sorted[0], sorted[1], sorted[2], sorted[3])
}
-fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): Quad {
- val scaleX = toWidth.toFloat() / fromWidth
- val scaleY = toHeight.toFloat() / fromHeight
+fun Quad.scaledTo(fromWidth: Double, fromHeight: Double, toWidth: Double, toHeight: Double): Quad {
+ val scaleX = toWidth / fromWidth
+ val scaleY = toHeight / fromHeight
return Quad(
topLeft = topLeft.scaled(scaleX, scaleY),
topRight = topRight.scaled(scaleX, scaleY),
@@ -92,6 +92,14 @@ fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int):
)
}
-fun Point.scaled(scaleX: Float, scaleY: Float): Point {
+fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): Quad {
+ return scaledTo(
+ fromWidth.toDouble(),
+ fromHeight.toDouble(),
+ toWidth.toDouble(),
+ toHeight.toDouble())
+}
+
+fun Point.scaled(scaleX: Double, scaleY: Double): Point {
return Point((x * scaleX), (y * scaleY))
}
diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/PostProcessing.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/PostProcessing.kt
index da09e42..325afc5 100644
--- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/PostProcessing.kt
+++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/PostProcessing.kt
@@ -22,8 +22,8 @@ import org.opencv.core.Size
import org.opencv.imgproc.Imgproc
import kotlin.math.max
-fun enhanceCapturedImage(img: Mat): Mat {
- return if (isColoredDocument(img)) {
+fun enhanceCapturedImage(img: Mat, isColored: Boolean): Mat {
+ return if (isColored) {
val result = Mat()
Core.convertScaleAbs(img, result, 1.2, 10.0)
result
@@ -36,63 +36,6 @@ fun enhanceCapturedImage(img: Mat): Mat {
}
}
-fun isColoredDocument(
- img: Mat,
- chromaThreshold: Double = 20.0,
- proportionThreshold: Double = 0.001
-): Boolean {
- val lab = Mat()
- Imgproc.cvtColor(img, lab, Imgproc.COLOR_BGR2Lab)
-
- val channels = ArrayList()
- Core.split(lab, channels)
- val a = channels[1]
- val b = channels[2]
-
- val aFloat = Mat()
- val bFloat = Mat()
- a.convertTo(aFloat, CvType.CV_32F)
- b.convertTo(bFloat, CvType.CV_32F)
-
- val aShifted = Mat()
- val bShifted = Mat()
- Core.subtract(aFloat, Scalar(128.0), aShifted)
- Core.subtract(bFloat, Scalar(128.0), bShifted)
-
- val aSq = Mat()
- val bSq = Mat()
- Core.multiply(aShifted, aShifted, aSq)
- Core.multiply(bShifted, bShifted, bSq)
-
- val sumSq = Mat()
- Core.add(aSq, bSq, sumSq)
-
- val chroma = Mat()
- Core.sqrt(sumSq, chroma)
-
- val mask = Mat()
- Imgproc.threshold(chroma, mask, chromaThreshold, 1.0, Imgproc.THRESH_BINARY)
- val coloredPixels = Core.countNonZero(mask)
-
- val totalPixels = chroma.rows() * chroma.cols()
- val proportion = coloredPixels.toDouble() / totalPixels.toDouble()
-
- lab.release()
- channels.forEach { it.release() }
- aFloat.release()
- bFloat.release()
- aShifted.release()
- bShifted.release()
- aSq.release()
- bSq.release()
- sumSq.release()
- chroma.release()
- mask.release()
-
- return proportion > proportionThreshold
-}
-
-
private fun multiScaleRetinex(img: Mat): Mat {
val imageSize = img.size()
val maxDim = max(imageSize.width, imageSize.height)