Improve distinction between color and grayscale documents (#79)
Better differentiate color and grayscale documents: - Look for colored pixels only in the intersection of the mask and quadrilateral - Apply a white balance (grey world) to the document - Exclude pixels with extreme luminance - Erode segmentation mask
This commit is contained in:
@@ -151,9 +151,17 @@ class ImageSegmentationService(private val context: Context, private val logger:
|
||||
}
|
||||
|
||||
override fun toMat(): Mat {
|
||||
val mat = Mat(height, width, CvType.CV_32FC1)
|
||||
mat.put(0, 0, probmap)
|
||||
return mat
|
||||
val threshold = 0.5f
|
||||
|
||||
val mask = Mat(height, width, CvType.CV_8UC1)
|
||||
val data = ByteArray(width * height)
|
||||
|
||||
for (i in probmap.indices) {
|
||||
data[i] = if (probmap[i] >= threshold) 255.toByte() else 0.toByte()
|
||||
}
|
||||
|
||||
mask.put(0, 0, data)
|
||||
return mask
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -31,12 +31,15 @@ import kotlinx.coroutines.flow.map
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.withContext
|
||||
import org.fairscan.app.AppContainer
|
||||
import org.fairscan.imageprocessing.Mask
|
||||
import org.fairscan.imageprocessing.Quad
|
||||
import org.fairscan.imageprocessing.detectDocumentQuad
|
||||
import org.fairscan.imageprocessing.extractDocument
|
||||
import org.fairscan.imageprocessing.scaledTo
|
||||
import org.opencv.android.Utils
|
||||
import org.opencv.core.CvType
|
||||
import org.opencv.core.Mat
|
||||
import org.opencv.imgproc.Imgproc
|
||||
import java.io.ByteArrayOutputStream
|
||||
|
||||
sealed interface CameraEvent {
|
||||
@@ -130,6 +133,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
||||
if (segmentation != null) {
|
||||
val mask = segmentation.segmentation
|
||||
var quad = detectDocumentQuad(mask, isLiveAnalysis = false)
|
||||
val rotationDegrees = imageProxy.imageInfo.rotationDegrees
|
||||
if (quad == null) {
|
||||
val now = System.currentTimeMillis()
|
||||
lastSuccessfulLiveAnalysisState?.timestamp?.let {
|
||||
@@ -139,7 +143,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
||||
val recentLive = lastSuccessfulLiveAnalysisState?.takeIf {
|
||||
now - it.timestamp <= 1500
|
||||
}
|
||||
val rotations = (-imageProxy.imageInfo.rotationDegrees / 90) + 4
|
||||
val rotations = (-rotationDegrees / 90) + 4
|
||||
quad = recentLive?.documentQuad?.rotate90(rotations, mask.width, mask.height)
|
||||
if (quad != null) {
|
||||
Log.i("Quad", "Using quad taken in live analysis; rotations=$rotations")
|
||||
@@ -147,7 +151,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
||||
}
|
||||
if (quad != null) {
|
||||
val resizedQuad = quad.scaledTo(mask.width, mask.height, bitmap.width, bitmap.height)
|
||||
corrected = extractDocumentFromBitmap(bitmap, resizedQuad, imageProxy.imageInfo.rotationDegrees)
|
||||
corrected = extractDocumentFromBitmap(bitmap, resizedQuad, rotationDegrees, mask)
|
||||
}
|
||||
}
|
||||
return@withContext corrected
|
||||
@@ -180,18 +184,33 @@ sealed class CaptureState {
|
||||
data class CaptureError(override val frozenImage: Bitmap) : CaptureState()
|
||||
data class CapturePreview(
|
||||
override val frozenImage: Bitmap,
|
||||
val processed: Bitmap
|
||||
val processed: Bitmap,
|
||||
) : CaptureState()
|
||||
}
|
||||
|
||||
fun extractDocumentFromBitmap(originalBitmap: Bitmap, quad: Quad, rotationDegrees: Int): Bitmap {
|
||||
val inputMat = Mat()
|
||||
Utils.bitmapToMat(originalBitmap, inputMat)
|
||||
return toBitmap(extractDocument(inputMat, quad, rotationDegrees))
|
||||
fun extractDocumentFromBitmap(image: Bitmap, quad: Quad, rotationDegrees: Int, mask: Mask): Bitmap {
|
||||
val rgba = Mat()
|
||||
Utils.bitmapToMat(image, rgba)
|
||||
val bgr = Mat()
|
||||
Imgproc.cvtColor(rgba, bgr, Imgproc.COLOR_RGBA2BGR) // CV_8UC4 → CV_8UC3
|
||||
rgba.release()
|
||||
val outBgr = extractDocument(bgr, quad, rotationDegrees, mask)
|
||||
bgr.release()
|
||||
val outBitmap = toBitmap(outBgr)
|
||||
outBgr.release()
|
||||
return outBitmap
|
||||
}
|
||||
|
||||
private fun toBitmap(mat: Mat): Bitmap {
|
||||
val outputBitmap = createBitmap(mat.cols(), mat.rows())
|
||||
Utils.matToBitmap(mat, outputBitmap)
|
||||
return outputBitmap
|
||||
fun toBitmap(bgr: Mat): Bitmap {
|
||||
require(bgr.type() == CvType.CV_8UC3)
|
||||
|
||||
val rgba = Mat()
|
||||
Imgproc.cvtColor(bgr, rgba, Imgproc.COLOR_BGR2RGBA)
|
||||
|
||||
val bmp = createBitmap(bgr.cols(), bgr.rows(), Bitmap.Config.ARGB_8888)
|
||||
Utils.matToBitmap(rgba, bmp)
|
||||
|
||||
rgba.release()
|
||||
return bmp
|
||||
}
|
||||
|
||||
|
||||
1
evaluation/.gitignore
vendored
1
evaluation/.gitignore
vendored
@@ -1,5 +1,6 @@
|
||||
/build
|
||||
/dataset/images
|
||||
/dataset/masks
|
||||
/dataset/metadata
|
||||
/python/venv
|
||||
/reports
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.evaluation
|
||||
|
||||
import org.fairscan.imageprocessing.detectDocumentQuad
|
||||
import org.fairscan.imageprocessing.extractDocument
|
||||
import org.fairscan.imageprocessing.isColoredDocument
|
||||
import org.fairscan.imageprocessing.scaledTo
|
||||
import org.opencv.core.Mat
|
||||
import org.opencv.imgcodecs.Imgcodecs
|
||||
import java.io.File
|
||||
|
||||
fun main() {
|
||||
nu.pattern.OpenCV.loadLocally()
|
||||
ColorDetectionEvaluator.run()
|
||||
}
|
||||
|
||||
object ColorDetectionEvaluator {
|
||||
|
||||
fun run() {
|
||||
val root = File("evaluation")
|
||||
val datasetDir = File(root, "dataset")
|
||||
val metadataDir = File(datasetDir, "metadata")
|
||||
val outputDir = File("evaluation/reports/color_detection").apply { mkdirs() }
|
||||
|
||||
val imageMetas = CsvMetadata.readImagesCsv(File(metadataDir, "images.csv"))
|
||||
val documentMetas = CsvMetadata.readDocumentsCsv(File(metadataDir, "documents.csv"))
|
||||
|
||||
val results = mutableListOf<ColorResult>()
|
||||
var nbProcessedImages = 0
|
||||
|
||||
for (meta in imageMetas) {
|
||||
val expected = documentMetas[meta.docId]?.isColored ?: continue
|
||||
val imgName = meta.imgName.removeSuffix(".jpg")
|
||||
val imgFile = File(datasetDir, "images/$imgName.jpg")
|
||||
val maskFile = File(datasetDir, "masks/$imgName.png")
|
||||
if (!imgFile.exists() || !maskFile.exists()) continue
|
||||
|
||||
val mat = Imgcodecs.imread(imgFile.absolutePath)
|
||||
if (mat.empty()) continue
|
||||
|
||||
val maskMat = Imgcodecs.imread(maskFile.absolutePath, Imgcodecs.IMREAD_UNCHANGED)
|
||||
if (maskMat.empty()) continue
|
||||
|
||||
println("Processing ${imgName}...")
|
||||
|
||||
val mask = MatMask(maskMat)
|
||||
|
||||
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
|
||||
?.scaledTo(mask.width, mask.height, mat.width(), mat.height())
|
||||
|
||||
val document: Mat = if (quad != null) {
|
||||
extractDocument(mat, quad, 0, mask)
|
||||
} else continue
|
||||
|
||||
val detected = isColoredDocument(mat, mask, quad)
|
||||
|
||||
nbProcessedImages++
|
||||
|
||||
val inputOut = File(outputDir, "${imgName}_input.jpg")
|
||||
Imgcodecs.imwrite(inputOut.absolutePath, mat)
|
||||
|
||||
val outputOut = File(outputDir, "${imgName}_output.jpg")
|
||||
Imgcodecs.imwrite(outputOut.absolutePath, document)
|
||||
|
||||
results += ColorResult(
|
||||
imgName,
|
||||
originalFile = inputOut,
|
||||
documentFile = outputOut,
|
||||
colorCase = ColorCase(expected, detected),
|
||||
)
|
||||
}
|
||||
|
||||
ColorDetectionReport.writeHtml(
|
||||
File(outputDir, "index.html"),
|
||||
Score(results.groupingBy { it.colorCase }.eachCount()),
|
||||
results
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
data class ColorCase(
|
||||
val expected: Boolean,
|
||||
val detected: Boolean
|
||||
) {
|
||||
val isMismatch: Boolean get() = expected != detected
|
||||
}
|
||||
|
||||
data class ColorResult(
|
||||
val imgName: String,
|
||||
val originalFile: File,
|
||||
val documentFile: File,
|
||||
val colorCase: ColorCase
|
||||
)
|
||||
|
||||
data class Score(
|
||||
val byCase: Map<ColorCase, Int>
|
||||
) {
|
||||
val total: Int get() = byCase.values.sum()
|
||||
val mismatchCount: Int get() = byCase.filter { it.key.isMismatch }.values.sum()
|
||||
val accuracy: Double get() = 1.0 - mismatchCount.toDouble() / total
|
||||
}
|
||||
|
||||
object ColorDetectionReport {
|
||||
|
||||
fun writeHtml(output: File, score: Score, results: List<ColorResult>) {
|
||||
val sb = StringBuilder()
|
||||
|
||||
sb.append("<html><body>")
|
||||
sb.append("<h1>Color Detection Evaluation</h1>")
|
||||
sb.append("<p>Total: ${score.total}</p>")
|
||||
sb.append("<p>Mismatches: ${score.mismatchCount}</p>")
|
||||
sb.append("<p>Accuracy: ${"%.2f".format(score.accuracy * 100)}%</p>")
|
||||
|
||||
score.byCase.forEach { (case, count) ->
|
||||
sb.append("<p>expectedColor=${case.expected} / detectedColor=${case.detected} : $count</p>")
|
||||
}
|
||||
|
||||
for (c in listOf(ColorCase(true, false), ColorCase(false, true))) {
|
||||
sb.append("<h2>expectedColor=${c.expected} / detectedColor=${c.detected}</h2>")
|
||||
for (r in results.filter { it.colorCase == c }) {
|
||||
sb.append(
|
||||
"""
|
||||
<div style="margin-bottom:20px;">
|
||||
<div style="display:flex; gap:20px;">
|
||||
<div><img width="300" src="${r.originalFile.name}" /></div>
|
||||
<div><img width="300" src="${r.documentFile.name}" /></div>
|
||||
</div>
|
||||
</div>
|
||||
""".trimIndent()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
sb.append("</body></html>")
|
||||
output.writeText(sb.toString())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.evaluation
|
||||
|
||||
import java.io.File
|
||||
|
||||
data class ImageMeta(
|
||||
val imgName: String,
|
||||
val docId: String
|
||||
)
|
||||
|
||||
data class DocumentMeta(
|
||||
val docId: String,
|
||||
val isColored: Boolean
|
||||
)
|
||||
|
||||
object CsvMetadata {
|
||||
|
||||
fun readImagesCsv(file: File): List<ImageMeta> {
|
||||
return file.readLines()
|
||||
.drop(1) // skip header
|
||||
.map { line ->
|
||||
val cols = line.split(',')
|
||||
ImageMeta(
|
||||
imgName = cols[0].trim(),
|
||||
docId = cols[1].trim()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fun readDocumentsCsv(file: File): Map<String, DocumentMeta> {
|
||||
return file.readLines()
|
||||
.drop(1)
|
||||
.map { line ->
|
||||
val cols = line.split(',')
|
||||
val docId = cols[0].trim()
|
||||
val isColored = cols[1].trim().equals("TRUE", ignoreCase = true)
|
||||
DocumentMeta(docId, isColored)
|
||||
}
|
||||
.associateBy { it.docId }
|
||||
}
|
||||
}
|
||||
@@ -31,7 +31,7 @@ class MatMask(private val mat: Mat) : Mask {
|
||||
override val width: Int get() = mat.width()
|
||||
override val height: Int get() = mat.height()
|
||||
|
||||
override fun toMat(): Mat = mat
|
||||
override fun toMat(): Mat = mat.clone()
|
||||
}
|
||||
|
||||
object DatasetEvaluator {
|
||||
@@ -71,7 +71,7 @@ object DatasetEvaluator {
|
||||
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
|
||||
|
||||
val corrected: Mat? = if (quad != null) {
|
||||
extractDocument(inputMat, quad = quad, rotationDegrees = 0)
|
||||
extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask)
|
||||
} else null
|
||||
|
||||
val inputOut = File(outputDir, "${e.name}_input.jpg")
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
* Copyright 2025 Pierre-Yves Nicolas
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation, either version 3 of the License, or (at your option)
|
||||
* any later version.
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package org.fairscan.imageprocessing
|
||||
|
||||
import org.opencv.core.Core
|
||||
import org.opencv.core.CvType
|
||||
import org.opencv.core.CvType.CV_8UC1
|
||||
import org.opencv.core.Mat
|
||||
import org.opencv.core.Mat.zeros
|
||||
import org.opencv.core.MatOfPoint
|
||||
import org.opencv.core.Scalar
|
||||
import org.opencv.core.Size
|
||||
import org.opencv.imgproc.Imgproc
|
||||
import org.opencv.imgproc.Imgproc.fillConvexPoly
|
||||
import kotlin.math.roundToInt
|
||||
import kotlin.math.sqrt
|
||||
|
||||
fun isColoredDocument(
|
||||
img: Mat,
|
||||
mask: Mask,
|
||||
quad: Quad,
|
||||
chromaThreshold: Double = 17.5,
|
||||
proportionThreshold: Double = 0.0003,
|
||||
luminanceMin: Double = 40.0,
|
||||
luminanceMax: Double = 180.0
|
||||
): Boolean {
|
||||
|
||||
// Work on a reasonable size (for correct performance)
|
||||
val resizedImg = resizeForMaxPixels(img, 1024.0 * 768.0)
|
||||
val workSize = resizedImg.size()
|
||||
|
||||
// 1) Compute doc mask (mask ∩ quad)
|
||||
val docMask = documentMask(mask, quad, img.size(), workSize)
|
||||
|
||||
// 2) Apply white balance only inside document
|
||||
val whiteBalanced = applyGrayWorldToDocument(resizedImg, docMask)
|
||||
|
||||
// 3) Convert to Lab, see https://en.wikipedia.org/wiki/CIELAB_color_space
|
||||
val lab = Mat()
|
||||
Imgproc.cvtColor(whiteBalanced, lab, Imgproc.COLOR_BGR2Lab)
|
||||
|
||||
// 4) Split Lab
|
||||
val channels = ArrayList<Mat>()
|
||||
Core.split(lab, channels)
|
||||
val luminance = channels[0]
|
||||
val a = channels[1]
|
||||
val b = channels[2]
|
||||
|
||||
// 5) Compute chroma
|
||||
val chroma = chroma(a, b)
|
||||
|
||||
val colorMask = Mat()
|
||||
Imgproc.threshold(chroma, colorMask, chromaThreshold, 255.0, Imgproc.THRESH_BINARY)
|
||||
colorMask.convertTo(colorMask, CvType.CV_8U)
|
||||
|
||||
// 6) Create luminance mask L ∈ [luminanceMin, luminanceMax]
|
||||
val luminanceMask = Mat()
|
||||
Core.inRange(luminance, Scalar(luminanceMin), Scalar(luminanceMax), luminanceMask)
|
||||
|
||||
// 7) Combine colorMask & luminanceMask & docMask
|
||||
val tmp = Mat()
|
||||
Core.bitwise_and(colorMask, luminanceMask, tmp)
|
||||
|
||||
val restrictedMask = Mat()
|
||||
Core.bitwise_and(tmp, docMask, restrictedMask)
|
||||
|
||||
val coloredPixels = Core.countNonZero(restrictedMask)
|
||||
val totalPixels = Core.countNonZero(docMask)
|
||||
|
||||
// 8) Cleanup
|
||||
resizedImg.release()
|
||||
whiteBalanced.release()
|
||||
lab.release()
|
||||
channels.forEach { it.release() }
|
||||
chroma.release()
|
||||
colorMask.release()
|
||||
luminanceMask.release()
|
||||
tmp.release()
|
||||
restrictedMask.release()
|
||||
docMask.release()
|
||||
|
||||
if (totalPixels == 0) return false
|
||||
|
||||
val proportion = coloredPixels.toDouble() / totalPixels.toDouble()
|
||||
return proportion > proportionThreshold
|
||||
}
|
||||
|
||||
private fun resizeForMaxPixels(img: Mat, maxPixels: Double): Mat {
|
||||
val origPixels = img.width() * img.height()
|
||||
if (origPixels <= maxPixels) {
|
||||
return img.clone()
|
||||
}
|
||||
val scale = sqrt(maxPixels / origPixels)
|
||||
val size = Size(img.width() * scale, img.height() * scale)
|
||||
val resizedImg = Mat()
|
||||
Imgproc.resize(img, resizedImg, size, 0.0, 0.0, Imgproc.INTER_AREA)
|
||||
return resizedImg
|
||||
}
|
||||
|
||||
private fun chroma(a: Mat, b: Mat): Mat {
|
||||
val aFloat = Mat()
|
||||
val bFloat = Mat()
|
||||
a.convertTo(aFloat, CvType.CV_32F)
|
||||
b.convertTo(bFloat, CvType.CV_32F)
|
||||
|
||||
val aShifted = Mat()
|
||||
val bShifted = Mat()
|
||||
Core.subtract(aFloat, Scalar(128.0), aShifted)
|
||||
Core.subtract(bFloat, Scalar(128.0), bShifted)
|
||||
|
||||
val chroma = Mat()
|
||||
Core.magnitude(aShifted, bShifted, chroma)
|
||||
|
||||
aFloat.release()
|
||||
bFloat.release()
|
||||
aShifted.release()
|
||||
bShifted.release()
|
||||
|
||||
return chroma
|
||||
}
|
||||
|
||||
private fun erodeBorder(mask: Mat, quad: Quad): Mat {
|
||||
val minDim = quad.edges().minOf { it.norm() }
|
||||
var k = (minDim * 0.02).roundToInt()
|
||||
k = k.coerceIn(3, 15)
|
||||
if (k % 2 == 0) k += 1
|
||||
|
||||
val kernel = Imgproc.getStructuringElement(
|
||||
Imgproc.MORPH_ELLIPSE,
|
||||
Size(k.toDouble(), k.toDouble())
|
||||
)
|
||||
val erodedMask = Mat()
|
||||
Imgproc.morphologyEx(mask, erodedMask, Imgproc.MORPH_ERODE, kernel)
|
||||
kernel.release()
|
||||
return erodedMask
|
||||
}
|
||||
|
||||
private fun documentMask(
|
||||
mask: Mask,
|
||||
quad: Quad,
|
||||
origSize: Size,
|
||||
workSize: Size,
|
||||
): Mat {
|
||||
val resizedMask = Mat()
|
||||
val maskMat = mask.toMat()
|
||||
Imgproc.resize(maskMat, resizedMask, workSize, 0.0, 0.0, Imgproc.INTER_AREA)
|
||||
val resizedQuad = quad.scaledTo(
|
||||
origSize.width, origSize.height, workSize.width, workSize.height
|
||||
)
|
||||
val erodedMask = erodeBorder(resizedMask, resizedQuad)
|
||||
val quadMask = zeros(erodedMask.size(), CV_8UC1)
|
||||
val pts = MatOfPoint(
|
||||
resizedQuad.topLeft.toCv(), resizedQuad.topRight.toCv(), resizedQuad.bottomRight.toCv(), resizedQuad.bottomLeft.toCv())
|
||||
fillConvexPoly(quadMask, pts, Scalar(255.0))
|
||||
|
||||
val docMask = Mat()
|
||||
Core.bitwise_and(erodedMask, quadMask, docMask)
|
||||
|
||||
quadMask.release()
|
||||
pts.release()
|
||||
erodedMask.release()
|
||||
resizedMask.release()
|
||||
maskMat.release()
|
||||
|
||||
return docMask
|
||||
}
|
||||
|
||||
fun applyGrayWorldToDocument(
|
||||
img: Mat,
|
||||
docMask: Mat,
|
||||
): Mat {
|
||||
require(img.type() == CvType.CV_8UC3)
|
||||
|
||||
val nonZero = Core.countNonZero(docMask)
|
||||
if (nonZero == 0) {
|
||||
docMask.release()
|
||||
return img.clone()
|
||||
}
|
||||
|
||||
// compute mean per channel on docMask (B,G,R)
|
||||
val meanScalar = Core.mean(img, docMask) // Scalar(bMean, gMean, rMean, alpha)
|
||||
val meanB = meanScalar.`val`[0]
|
||||
val meanG = meanScalar.`val`[1]
|
||||
val meanR = meanScalar.`val`[2]
|
||||
|
||||
// safety: avoid division by very small values
|
||||
val eps = 1e-6
|
||||
val meanBsafe = if (meanB < eps) eps else meanB
|
||||
val meanGsafe = if (meanG < eps) eps else meanG
|
||||
val meanRsafe = if (meanR < eps) eps else meanR
|
||||
|
||||
val meanGray = (meanBsafe + meanGsafe + meanRsafe) / 3.0
|
||||
|
||||
val scaleB = meanGray / meanBsafe
|
||||
val scaleG = meanGray / meanGsafe
|
||||
val scaleR = meanGray / meanRsafe
|
||||
|
||||
// apply per-channel scaling only on docMask
|
||||
// convert to float
|
||||
val imgF = Mat()
|
||||
img.convertTo(imgF, CvType.CV_32FC3)
|
||||
|
||||
// build scales scalar in BGR order
|
||||
val scales = Scalar(scaleB, scaleG, scaleR)
|
||||
|
||||
// prepare scaled full image (float)
|
||||
val scaledF = Mat()
|
||||
Core.multiply(imgF, scales, scaledF)
|
||||
|
||||
// convert scaledF back to 8U
|
||||
val scaled8 = Mat()
|
||||
scaledF.convertTo(scaled8, CvType.CV_8UC3)
|
||||
|
||||
// result = original copy, then copy scaled pixels where docMask != 0
|
||||
val result = img.clone()
|
||||
scaled8.copyTo(result, docMask)
|
||||
|
||||
// cleanup
|
||||
imgF.release()
|
||||
scaledF.release()
|
||||
scaled8.release()
|
||||
|
||||
return result
|
||||
}
|
||||
@@ -18,7 +18,6 @@ import org.fairscan.imageprocessing.quad.detectDocumentQuadFromProbmap
|
||||
import org.fairscan.imageprocessing.quad.findQuadFromRightAngles
|
||||
import org.fairscan.imageprocessing.quad.minAreaRect
|
||||
import org.opencv.core.Core
|
||||
import org.opencv.core.CvType
|
||||
import org.opencv.core.Mat
|
||||
import org.opencv.core.MatOfPoint
|
||||
import org.opencv.core.MatOfPoint2f
|
||||
@@ -62,10 +61,7 @@ fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Do
|
||||
}
|
||||
|
||||
private fun biggestContour(mat: Mat): Pair<MatOfPoint2f?, Double> {
|
||||
val mat8u = Mat()
|
||||
mat.convertTo(mat8u, CvType.CV_8UC1, 255.0)
|
||||
|
||||
val refinedMask = refineMask(mat8u)
|
||||
val refinedMask = refineMask(mat)
|
||||
|
||||
val blurred = Mat()
|
||||
Imgproc.GaussianBlur(refinedMask, blurred, Size(5.0, 5.0), 0.0)
|
||||
@@ -116,7 +112,12 @@ fun refineMask(original: Mat): Mat {
|
||||
return opened
|
||||
}
|
||||
|
||||
fun extractDocument(inputMat: Mat, quad: Quad, rotationDegrees: Int): Mat {
|
||||
fun extractDocument(
|
||||
inputMat: Mat,
|
||||
quad: Quad,
|
||||
rotationDegrees: Int,
|
||||
mask: Mask,
|
||||
): Mat {
|
||||
val widthTop = norm(quad.topLeft, quad.topRight)
|
||||
val widthBottom = norm(quad.bottomLeft, quad.bottomRight)
|
||||
val targetWidth = (widthTop + widthBottom) / 2
|
||||
@@ -144,7 +145,8 @@ fun extractDocument(inputMat: Mat, quad: Quad, rotationDegrees: Int): Mat {
|
||||
Imgproc.warpPerspective(inputMat, outputMat, transform, outputSize)
|
||||
|
||||
val resized = resize(outputMat, 1500.0)
|
||||
val enhanced = enhanceCapturedImage(resized)
|
||||
val isColored = isColoredDocument(inputMat, mask, quad)
|
||||
val enhanced = enhanceCapturedImage(resized, isColored)
|
||||
val rotated = rotate(enhanced, rotationDegrees)
|
||||
|
||||
return rotated
|
||||
|
||||
@@ -81,9 +81,9 @@ fun createQuad(vertices: List<Point>): Quad {
|
||||
return Quad(sorted[0], sorted[1], sorted[2], sorted[3])
|
||||
}
|
||||
|
||||
fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): Quad {
|
||||
val scaleX = toWidth.toFloat() / fromWidth
|
||||
val scaleY = toHeight.toFloat() / fromHeight
|
||||
fun Quad.scaledTo(fromWidth: Double, fromHeight: Double, toWidth: Double, toHeight: Double): Quad {
|
||||
val scaleX = toWidth / fromWidth
|
||||
val scaleY = toHeight / fromHeight
|
||||
return Quad(
|
||||
topLeft = topLeft.scaled(scaleX, scaleY),
|
||||
topRight = topRight.scaled(scaleX, scaleY),
|
||||
@@ -92,6 +92,14 @@ fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int):
|
||||
)
|
||||
}
|
||||
|
||||
fun Point.scaled(scaleX: Float, scaleY: Float): Point {
|
||||
fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): Quad {
|
||||
return scaledTo(
|
||||
fromWidth.toDouble(),
|
||||
fromHeight.toDouble(),
|
||||
toWidth.toDouble(),
|
||||
toHeight.toDouble())
|
||||
}
|
||||
|
||||
fun Point.scaled(scaleX: Double, scaleY: Double): Point {
|
||||
return Point((x * scaleX), (y * scaleY))
|
||||
}
|
||||
|
||||
@@ -22,8 +22,8 @@ import org.opencv.core.Size
|
||||
import org.opencv.imgproc.Imgproc
|
||||
import kotlin.math.max
|
||||
|
||||
fun enhanceCapturedImage(img: Mat): Mat {
|
||||
return if (isColoredDocument(img)) {
|
||||
fun enhanceCapturedImage(img: Mat, isColored: Boolean): Mat {
|
||||
return if (isColored) {
|
||||
val result = Mat()
|
||||
Core.convertScaleAbs(img, result, 1.2, 10.0)
|
||||
result
|
||||
@@ -36,63 +36,6 @@ fun enhanceCapturedImage(img: Mat): Mat {
|
||||
}
|
||||
}
|
||||
|
||||
fun isColoredDocument(
|
||||
img: Mat,
|
||||
chromaThreshold: Double = 20.0,
|
||||
proportionThreshold: Double = 0.001
|
||||
): Boolean {
|
||||
val lab = Mat()
|
||||
Imgproc.cvtColor(img, lab, Imgproc.COLOR_BGR2Lab)
|
||||
|
||||
val channels = ArrayList<Mat>()
|
||||
Core.split(lab, channels)
|
||||
val a = channels[1]
|
||||
val b = channels[2]
|
||||
|
||||
val aFloat = Mat()
|
||||
val bFloat = Mat()
|
||||
a.convertTo(aFloat, CvType.CV_32F)
|
||||
b.convertTo(bFloat, CvType.CV_32F)
|
||||
|
||||
val aShifted = Mat()
|
||||
val bShifted = Mat()
|
||||
Core.subtract(aFloat, Scalar(128.0), aShifted)
|
||||
Core.subtract(bFloat, Scalar(128.0), bShifted)
|
||||
|
||||
val aSq = Mat()
|
||||
val bSq = Mat()
|
||||
Core.multiply(aShifted, aShifted, aSq)
|
||||
Core.multiply(bShifted, bShifted, bSq)
|
||||
|
||||
val sumSq = Mat()
|
||||
Core.add(aSq, bSq, sumSq)
|
||||
|
||||
val chroma = Mat()
|
||||
Core.sqrt(sumSq, chroma)
|
||||
|
||||
val mask = Mat()
|
||||
Imgproc.threshold(chroma, mask, chromaThreshold, 1.0, Imgproc.THRESH_BINARY)
|
||||
val coloredPixels = Core.countNonZero(mask)
|
||||
|
||||
val totalPixels = chroma.rows() * chroma.cols()
|
||||
val proportion = coloredPixels.toDouble() / totalPixels.toDouble()
|
||||
|
||||
lab.release()
|
||||
channels.forEach { it.release() }
|
||||
aFloat.release()
|
||||
bFloat.release()
|
||||
aShifted.release()
|
||||
bShifted.release()
|
||||
aSq.release()
|
||||
bSq.release()
|
||||
sumSq.release()
|
||||
chroma.release()
|
||||
mask.release()
|
||||
|
||||
return proportion > proportionThreshold
|
||||
}
|
||||
|
||||
|
||||
private fun multiScaleRetinex(img: Mat): Mat {
|
||||
val imageSize = img.size()
|
||||
val maxDim = max(imageSize.width, imageSize.height)
|
||||
|
||||
Reference in New Issue
Block a user