Improve distinction between color and grayscale documents (#79)
Better differentiate color and grayscale documents: - Look for colored pixels only in the intersection of the mask and quadrilateral - Apply a white balance (grey world) to the document - Exclude pixels with extreme luminance - Erode segmentation mask
This commit is contained in:
1
evaluation/.gitignore
vendored
1
evaluation/.gitignore
vendored
@@ -1,5 +1,6 @@
|
||||
/build
|
||||
/dataset/images
|
||||
/dataset/masks
|
||||
/dataset/metadata
|
||||
/python/venv
|
||||
/reports
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.evaluation
|
||||
|
||||
import org.fairscan.imageprocessing.detectDocumentQuad
|
||||
import org.fairscan.imageprocessing.extractDocument
|
||||
import org.fairscan.imageprocessing.isColoredDocument
|
||||
import org.fairscan.imageprocessing.scaledTo
|
||||
import org.opencv.core.Mat
|
||||
import org.opencv.imgcodecs.Imgcodecs
|
||||
import java.io.File
|
||||
|
||||
fun main() {
|
||||
nu.pattern.OpenCV.loadLocally()
|
||||
ColorDetectionEvaluator.run()
|
||||
}
|
||||
|
||||
object ColorDetectionEvaluator {
|
||||
|
||||
fun run() {
|
||||
val root = File("evaluation")
|
||||
val datasetDir = File(root, "dataset")
|
||||
val metadataDir = File(datasetDir, "metadata")
|
||||
val outputDir = File("evaluation/reports/color_detection").apply { mkdirs() }
|
||||
|
||||
val imageMetas = CsvMetadata.readImagesCsv(File(metadataDir, "images.csv"))
|
||||
val documentMetas = CsvMetadata.readDocumentsCsv(File(metadataDir, "documents.csv"))
|
||||
|
||||
val results = mutableListOf<ColorResult>()
|
||||
var nbProcessedImages = 0
|
||||
|
||||
for (meta in imageMetas) {
|
||||
val expected = documentMetas[meta.docId]?.isColored ?: continue
|
||||
val imgName = meta.imgName.removeSuffix(".jpg")
|
||||
val imgFile = File(datasetDir, "images/$imgName.jpg")
|
||||
val maskFile = File(datasetDir, "masks/$imgName.png")
|
||||
if (!imgFile.exists() || !maskFile.exists()) continue
|
||||
|
||||
val mat = Imgcodecs.imread(imgFile.absolutePath)
|
||||
if (mat.empty()) continue
|
||||
|
||||
val maskMat = Imgcodecs.imread(maskFile.absolutePath, Imgcodecs.IMREAD_UNCHANGED)
|
||||
if (maskMat.empty()) continue
|
||||
|
||||
println("Processing ${imgName}...")
|
||||
|
||||
val mask = MatMask(maskMat)
|
||||
|
||||
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
|
||||
?.scaledTo(mask.width, mask.height, mat.width(), mat.height())
|
||||
|
||||
val document: Mat = if (quad != null) {
|
||||
extractDocument(mat, quad, 0, mask)
|
||||
} else continue
|
||||
|
||||
val detected = isColoredDocument(mat, mask, quad)
|
||||
|
||||
nbProcessedImages++
|
||||
|
||||
val inputOut = File(outputDir, "${imgName}_input.jpg")
|
||||
Imgcodecs.imwrite(inputOut.absolutePath, mat)
|
||||
|
||||
val outputOut = File(outputDir, "${imgName}_output.jpg")
|
||||
Imgcodecs.imwrite(outputOut.absolutePath, document)
|
||||
|
||||
results += ColorResult(
|
||||
imgName,
|
||||
originalFile = inputOut,
|
||||
documentFile = outputOut,
|
||||
colorCase = ColorCase(expected, detected),
|
||||
)
|
||||
}
|
||||
|
||||
ColorDetectionReport.writeHtml(
|
||||
File(outputDir, "index.html"),
|
||||
Score(results.groupingBy { it.colorCase }.eachCount()),
|
||||
results
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
data class ColorCase(
|
||||
val expected: Boolean,
|
||||
val detected: Boolean
|
||||
) {
|
||||
val isMismatch: Boolean get() = expected != detected
|
||||
}
|
||||
|
||||
data class ColorResult(
|
||||
val imgName: String,
|
||||
val originalFile: File,
|
||||
val documentFile: File,
|
||||
val colorCase: ColorCase
|
||||
)
|
||||
|
||||
data class Score(
|
||||
val byCase: Map<ColorCase, Int>
|
||||
) {
|
||||
val total: Int get() = byCase.values.sum()
|
||||
val mismatchCount: Int get() = byCase.filter { it.key.isMismatch }.values.sum()
|
||||
val accuracy: Double get() = 1.0 - mismatchCount.toDouble() / total
|
||||
}
|
||||
|
||||
object ColorDetectionReport {
|
||||
|
||||
fun writeHtml(output: File, score: Score, results: List<ColorResult>) {
|
||||
val sb = StringBuilder()
|
||||
|
||||
sb.append("<html><body>")
|
||||
sb.append("<h1>Color Detection Evaluation</h1>")
|
||||
sb.append("<p>Total: ${score.total}</p>")
|
||||
sb.append("<p>Mismatches: ${score.mismatchCount}</p>")
|
||||
sb.append("<p>Accuracy: ${"%.2f".format(score.accuracy * 100)}%</p>")
|
||||
|
||||
score.byCase.forEach { (case, count) ->
|
||||
sb.append("<p>expectedColor=${case.expected} / detectedColor=${case.detected} : $count</p>")
|
||||
}
|
||||
|
||||
for (c in listOf(ColorCase(true, false), ColorCase(false, true))) {
|
||||
sb.append("<h2>expectedColor=${c.expected} / detectedColor=${c.detected}</h2>")
|
||||
for (r in results.filter { it.colorCase == c }) {
|
||||
sb.append(
|
||||
"""
|
||||
<div style="margin-bottom:20px;">
|
||||
<div style="display:flex; gap:20px;">
|
||||
<div><img width="300" src="${r.originalFile.name}" /></div>
|
||||
<div><img width="300" src="${r.documentFile.name}" /></div>
|
||||
</div>
|
||||
</div>
|
||||
""".trimIndent()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
sb.append("</body></html>")
|
||||
output.writeText(sb.toString())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.evaluation
|
||||
|
||||
import java.io.File
|
||||
|
||||
data class ImageMeta(
|
||||
val imgName: String,
|
||||
val docId: String
|
||||
)
|
||||
|
||||
data class DocumentMeta(
|
||||
val docId: String,
|
||||
val isColored: Boolean
|
||||
)
|
||||
|
||||
object CsvMetadata {
|
||||
|
||||
fun readImagesCsv(file: File): List<ImageMeta> {
|
||||
return file.readLines()
|
||||
.drop(1) // skip header
|
||||
.map { line ->
|
||||
val cols = line.split(',')
|
||||
ImageMeta(
|
||||
imgName = cols[0].trim(),
|
||||
docId = cols[1].trim()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fun readDocumentsCsv(file: File): Map<String, DocumentMeta> {
|
||||
return file.readLines()
|
||||
.drop(1)
|
||||
.map { line ->
|
||||
val cols = line.split(',')
|
||||
val docId = cols[0].trim()
|
||||
val isColored = cols[1].trim().equals("TRUE", ignoreCase = true)
|
||||
DocumentMeta(docId, isColored)
|
||||
}
|
||||
.associateBy { it.docId }
|
||||
}
|
||||
}
|
||||
@@ -31,7 +31,7 @@ class MatMask(private val mat: Mat) : Mask {
|
||||
override val width: Int get() = mat.width()
|
||||
override val height: Int get() = mat.height()
|
||||
|
||||
override fun toMat(): Mat = mat
|
||||
override fun toMat(): Mat = mat.clone()
|
||||
}
|
||||
|
||||
object DatasetEvaluator {
|
||||
@@ -71,7 +71,7 @@ object DatasetEvaluator {
|
||||
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
|
||||
|
||||
val corrected: Mat? = if (quad != null) {
|
||||
extractDocument(inputMat, quad = quad, rotationDegrees = 0)
|
||||
extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask)
|
||||
} else null
|
||||
|
||||
val inputOut = File(outputDir, "${e.name}_input.jpg")
|
||||
|
||||
Reference in New Issue
Block a user