Improve distinction between color and grayscale documents (#79)

Better differentiate color and grayscale documents:
- Look for colored pixels only in the intersection of the mask and quadrilateral
- Apply a white balance (grey world) to the document
- Exclude pixels with extreme luminance
- Erode segmentation mask
This commit is contained in:
pynicolas
2025-12-10 17:08:21 +01:00
committed by GitHub
parent 87433fa96a
commit fe91f3e241
10 changed files with 509 additions and 86 deletions

View File

@@ -1,5 +1,6 @@
/build
/dataset/images
/dataset/masks
/dataset/metadata
/python/venv
/reports

View File

@@ -0,0 +1,152 @@
/*
* Copyright 2025 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.evaluation
import org.fairscan.imageprocessing.detectDocumentQuad
import org.fairscan.imageprocessing.extractDocument
import org.fairscan.imageprocessing.isColoredDocument
import org.fairscan.imageprocessing.scaledTo
import org.opencv.core.Mat
import org.opencv.imgcodecs.Imgcodecs
import java.io.File
fun main() {
nu.pattern.OpenCV.loadLocally()
ColorDetectionEvaluator.run()
}
object ColorDetectionEvaluator {
fun run() {
val root = File("evaluation")
val datasetDir = File(root, "dataset")
val metadataDir = File(datasetDir, "metadata")
val outputDir = File("evaluation/reports/color_detection").apply { mkdirs() }
val imageMetas = CsvMetadata.readImagesCsv(File(metadataDir, "images.csv"))
val documentMetas = CsvMetadata.readDocumentsCsv(File(metadataDir, "documents.csv"))
val results = mutableListOf<ColorResult>()
var nbProcessedImages = 0
for (meta in imageMetas) {
val expected = documentMetas[meta.docId]?.isColored ?: continue
val imgName = meta.imgName.removeSuffix(".jpg")
val imgFile = File(datasetDir, "images/$imgName.jpg")
val maskFile = File(datasetDir, "masks/$imgName.png")
if (!imgFile.exists() || !maskFile.exists()) continue
val mat = Imgcodecs.imread(imgFile.absolutePath)
if (mat.empty()) continue
val maskMat = Imgcodecs.imread(maskFile.absolutePath, Imgcodecs.IMREAD_UNCHANGED)
if (maskMat.empty()) continue
println("Processing ${imgName}...")
val mask = MatMask(maskMat)
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
?.scaledTo(mask.width, mask.height, mat.width(), mat.height())
val document: Mat = if (quad != null) {
extractDocument(mat, quad, 0, mask)
} else continue
val detected = isColoredDocument(mat, mask, quad)
nbProcessedImages++
val inputOut = File(outputDir, "${imgName}_input.jpg")
Imgcodecs.imwrite(inputOut.absolutePath, mat)
val outputOut = File(outputDir, "${imgName}_output.jpg")
Imgcodecs.imwrite(outputOut.absolutePath, document)
results += ColorResult(
imgName,
originalFile = inputOut,
documentFile = outputOut,
colorCase = ColorCase(expected, detected),
)
}
ColorDetectionReport.writeHtml(
File(outputDir, "index.html"),
Score(results.groupingBy { it.colorCase }.eachCount()),
results
)
}
}
data class ColorCase(
val expected: Boolean,
val detected: Boolean
) {
val isMismatch: Boolean get() = expected != detected
}
data class ColorResult(
val imgName: String,
val originalFile: File,
val documentFile: File,
val colorCase: ColorCase
)
data class Score(
val byCase: Map<ColorCase, Int>
) {
val total: Int get() = byCase.values.sum()
val mismatchCount: Int get() = byCase.filter { it.key.isMismatch }.values.sum()
val accuracy: Double get() = 1.0 - mismatchCount.toDouble() / total
}
object ColorDetectionReport {
fun writeHtml(output: File, score: Score, results: List<ColorResult>) {
val sb = StringBuilder()
sb.append("<html><body>")
sb.append("<h1>Color Detection Evaluation</h1>")
sb.append("<p>Total: ${score.total}</p>")
sb.append("<p>Mismatches: ${score.mismatchCount}</p>")
sb.append("<p>Accuracy: ${"%.2f".format(score.accuracy * 100)}%</p>")
score.byCase.forEach { (case, count) ->
sb.append("<p>expectedColor=${case.expected} / detectedColor=${case.detected} : $count</p>")
}
for (c in listOf(ColorCase(true, false), ColorCase(false, true))) {
sb.append("<h2>expectedColor=${c.expected} / detectedColor=${c.detected}</h2>")
for (r in results.filter { it.colorCase == c }) {
sb.append(
"""
<div style="margin-bottom:20px;">
<div style="display:flex; gap:20px;">
<div><img width="300" src="${r.originalFile.name}" /></div>
<div><img width="300" src="${r.documentFile.name}" /></div>
</div>
</div>
""".trimIndent()
)
}
}
sb.append("</body></html>")
output.writeText(sb.toString())
}
}

View File

@@ -0,0 +1,54 @@
/*
* Copyright 2025 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.evaluation
import java.io.File
data class ImageMeta(
val imgName: String,
val docId: String
)
data class DocumentMeta(
val docId: String,
val isColored: Boolean
)
object CsvMetadata {
fun readImagesCsv(file: File): List<ImageMeta> {
return file.readLines()
.drop(1) // skip header
.map { line ->
val cols = line.split(',')
ImageMeta(
imgName = cols[0].trim(),
docId = cols[1].trim()
)
}
}
fun readDocumentsCsv(file: File): Map<String, DocumentMeta> {
return file.readLines()
.drop(1)
.map { line ->
val cols = line.split(',')
val docId = cols[0].trim()
val isColored = cols[1].trim().equals("TRUE", ignoreCase = true)
DocumentMeta(docId, isColored)
}
.associateBy { it.docId }
}
}

View File

@@ -31,7 +31,7 @@ class MatMask(private val mat: Mat) : Mask {
override val width: Int get() = mat.width()
override val height: Int get() = mat.height()
override fun toMat(): Mat = mat
override fun toMat(): Mat = mat.clone()
}
object DatasetEvaluator {
@@ -71,7 +71,7 @@ object DatasetEvaluator {
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
val corrected: Mat? = if (quad != null) {
extractDocument(inputMat, quad = quad, rotationDegrees = 0)
extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask)
} else null
val inputOut = File(outputDir, "${e.name}_input.jpg")