From fe91f3e241b1c31418de576233e68ce52ca5f8e3 Mon Sep 17 00:00:00 2001 From: pynicolas <6371790+pynicolas@users.noreply.github.com> Date: Wed, 10 Dec 2025 17:08:21 +0100 Subject: [PATCH] Improve distinction between color and grayscale documents (#79) Better differentiate color and grayscale documents: - Look for colored pixels only in the intersection of the mask and quadrilateral - Apply a white balance (grey world) to the document - Exclude pixels with extreme luminance - Erode segmentation mask --- .../fairscan/app/domain/ImageSegmentation.kt | 14 +- .../app/ui/screens/camera/CameraViewModel.kt | 41 ++- evaluation/.gitignore | 1 + .../evaluation/ColorDetectionEvaluator.kt | 152 +++++++++++ .../org/fairscan/evaluation/CsvMetadata.kt | 54 ++++ .../fairscan/evaluation/DatasetEvaluator.kt | 4 +- .../imageprocessing/ColorDetection.kt | 236 ++++++++++++++++++ .../imageprocessing/DocumentDetection.kt | 16 +- .../org/fairscan/imageprocessing/Geometry.kt | 16 +- .../imageprocessing/PostProcessing.kt | 61 +---- 10 files changed, 509 insertions(+), 86 deletions(-) create mode 100644 evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt create mode 100644 evaluation/src/main/java/org/fairscan/evaluation/CsvMetadata.kt create mode 100644 imageprocessing/src/main/java/org/fairscan/imageprocessing/ColorDetection.kt diff --git a/app/src/main/java/org/fairscan/app/domain/ImageSegmentation.kt b/app/src/main/java/org/fairscan/app/domain/ImageSegmentation.kt index fec3804..dc090c1 100644 --- a/app/src/main/java/org/fairscan/app/domain/ImageSegmentation.kt +++ b/app/src/main/java/org/fairscan/app/domain/ImageSegmentation.kt @@ -151,9 +151,17 @@ class ImageSegmentationService(private val context: Context, private val logger: } override fun toMat(): Mat { - val mat = Mat(height, width, CvType.CV_32FC1) - mat.put(0, 0, probmap) - return mat + val threshold = 0.5f + + val mask = Mat(height, width, CvType.CV_8UC1) + val data = ByteArray(width * height) + + for (i in probmap.indices) { + data[i] = if (probmap[i] >= threshold) 255.toByte() else 0.toByte() + } + + mask.put(0, 0, data) + return mask } } diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt index c02aa30..cd9f9c5 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt @@ -31,12 +31,15 @@ import kotlinx.coroutines.flow.map import kotlinx.coroutines.launch import kotlinx.coroutines.withContext import org.fairscan.app.AppContainer +import org.fairscan.imageprocessing.Mask import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.detectDocumentQuad import org.fairscan.imageprocessing.extractDocument import org.fairscan.imageprocessing.scaledTo import org.opencv.android.Utils +import org.opencv.core.CvType import org.opencv.core.Mat +import org.opencv.imgproc.Imgproc import java.io.ByteArrayOutputStream sealed interface CameraEvent { @@ -130,6 +133,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { if (segmentation != null) { val mask = segmentation.segmentation var quad = detectDocumentQuad(mask, isLiveAnalysis = false) + val rotationDegrees = imageProxy.imageInfo.rotationDegrees if (quad == null) { val now = System.currentTimeMillis() lastSuccessfulLiveAnalysisState?.timestamp?.let { @@ -139,7 +143,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { val recentLive = lastSuccessfulLiveAnalysisState?.takeIf { now - it.timestamp <= 1500 } - val rotations = (-imageProxy.imageInfo.rotationDegrees / 90) + 4 + val rotations = (-rotationDegrees / 90) + 4 quad = recentLive?.documentQuad?.rotate90(rotations, mask.width, mask.height) if (quad != null) { Log.i("Quad", "Using quad taken in live analysis; rotations=$rotations") @@ -147,7 +151,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { } if (quad != null) { val resizedQuad = quad.scaledTo(mask.width, mask.height, bitmap.width, bitmap.height) - corrected = extractDocumentFromBitmap(bitmap, resizedQuad, imageProxy.imageInfo.rotationDegrees) + corrected = extractDocumentFromBitmap(bitmap, resizedQuad, rotationDegrees, mask) } } return@withContext corrected @@ -180,18 +184,33 @@ sealed class CaptureState { data class CaptureError(override val frozenImage: Bitmap) : CaptureState() data class CapturePreview( override val frozenImage: Bitmap, - val processed: Bitmap + val processed: Bitmap, ) : CaptureState() } -fun extractDocumentFromBitmap(originalBitmap: Bitmap, quad: Quad, rotationDegrees: Int): Bitmap { - val inputMat = Mat() - Utils.bitmapToMat(originalBitmap, inputMat) - return toBitmap(extractDocument(inputMat, quad, rotationDegrees)) +fun extractDocumentFromBitmap(image: Bitmap, quad: Quad, rotationDegrees: Int, mask: Mask): Bitmap { + val rgba = Mat() + Utils.bitmapToMat(image, rgba) + val bgr = Mat() + Imgproc.cvtColor(rgba, bgr, Imgproc.COLOR_RGBA2BGR) // CV_8UC4 → CV_8UC3 + rgba.release() + val outBgr = extractDocument(bgr, quad, rotationDegrees, mask) + bgr.release() + val outBitmap = toBitmap(outBgr) + outBgr.release() + return outBitmap } -private fun toBitmap(mat: Mat): Bitmap { - val outputBitmap = createBitmap(mat.cols(), mat.rows()) - Utils.matToBitmap(mat, outputBitmap) - return outputBitmap +fun toBitmap(bgr: Mat): Bitmap { + require(bgr.type() == CvType.CV_8UC3) + + val rgba = Mat() + Imgproc.cvtColor(bgr, rgba, Imgproc.COLOR_BGR2RGBA) + + val bmp = createBitmap(bgr.cols(), bgr.rows(), Bitmap.Config.ARGB_8888) + Utils.matToBitmap(rgba, bmp) + + rgba.release() + return bmp } + diff --git a/evaluation/.gitignore b/evaluation/.gitignore index 6a6dc85..02c5224 100644 --- a/evaluation/.gitignore +++ b/evaluation/.gitignore @@ -1,5 +1,6 @@ /build /dataset/images /dataset/masks +/dataset/metadata /python/venv /reports diff --git a/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt b/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt new file mode 100644 index 0000000..b7c8a3c --- /dev/null +++ b/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt @@ -0,0 +1,152 @@ +/* + * Copyright 2025 Pierre-Yves Nicolas + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.evaluation + +import org.fairscan.imageprocessing.detectDocumentQuad +import org.fairscan.imageprocessing.extractDocument +import org.fairscan.imageprocessing.isColoredDocument +import org.fairscan.imageprocessing.scaledTo +import org.opencv.core.Mat +import org.opencv.imgcodecs.Imgcodecs +import java.io.File + +fun main() { + nu.pattern.OpenCV.loadLocally() + ColorDetectionEvaluator.run() +} + +object ColorDetectionEvaluator { + + fun run() { + val root = File("evaluation") + val datasetDir = File(root, "dataset") + val metadataDir = File(datasetDir, "metadata") + val outputDir = File("evaluation/reports/color_detection").apply { mkdirs() } + + val imageMetas = CsvMetadata.readImagesCsv(File(metadataDir, "images.csv")) + val documentMetas = CsvMetadata.readDocumentsCsv(File(metadataDir, "documents.csv")) + + val results = mutableListOf() + var nbProcessedImages = 0 + + for (meta in imageMetas) { + val expected = documentMetas[meta.docId]?.isColored ?: continue + val imgName = meta.imgName.removeSuffix(".jpg") + val imgFile = File(datasetDir, "images/$imgName.jpg") + val maskFile = File(datasetDir, "masks/$imgName.png") + if (!imgFile.exists() || !maskFile.exists()) continue + + val mat = Imgcodecs.imread(imgFile.absolutePath) + if (mat.empty()) continue + + val maskMat = Imgcodecs.imread(maskFile.absolutePath, Imgcodecs.IMREAD_UNCHANGED) + if (maskMat.empty()) continue + + println("Processing ${imgName}...") + + val mask = MatMask(maskMat) + + val quad = detectDocumentQuad(mask, isLiveAnalysis = false) + ?.scaledTo(mask.width, mask.height, mat.width(), mat.height()) + + val document: Mat = if (quad != null) { + extractDocument(mat, quad, 0, mask) + } else continue + + val detected = isColoredDocument(mat, mask, quad) + + nbProcessedImages++ + + val inputOut = File(outputDir, "${imgName}_input.jpg") + Imgcodecs.imwrite(inputOut.absolutePath, mat) + + val outputOut = File(outputDir, "${imgName}_output.jpg") + Imgcodecs.imwrite(outputOut.absolutePath, document) + + results += ColorResult( + imgName, + originalFile = inputOut, + documentFile = outputOut, + colorCase = ColorCase(expected, detected), + ) + } + + ColorDetectionReport.writeHtml( + File(outputDir, "index.html"), + Score(results.groupingBy { it.colorCase }.eachCount()), + results + ) + } +} + +data class ColorCase( + val expected: Boolean, + val detected: Boolean +) { + val isMismatch: Boolean get() = expected != detected +} + +data class ColorResult( + val imgName: String, + val originalFile: File, + val documentFile: File, + val colorCase: ColorCase +) + +data class Score( + val byCase: Map +) { + val total: Int get() = byCase.values.sum() + val mismatchCount: Int get() = byCase.filter { it.key.isMismatch }.values.sum() + val accuracy: Double get() = 1.0 - mismatchCount.toDouble() / total +} + +object ColorDetectionReport { + + fun writeHtml(output: File, score: Score, results: List) { + val sb = StringBuilder() + + sb.append("") + sb.append("

Color Detection Evaluation

") + sb.append("

Total: ${score.total}

") + sb.append("

Mismatches: ${score.mismatchCount}

") + sb.append("

Accuracy: ${"%.2f".format(score.accuracy * 100)}%

") + + score.byCase.forEach { (case, count) -> + sb.append("

expectedColor=${case.expected} / detectedColor=${case.detected} : $count

") + } + + for (c in listOf(ColorCase(true, false), ColorCase(false, true))) { + sb.append("

expectedColor=${c.expected} / detectedColor=${c.detected}

") + for (r in results.filter { it.colorCase == c }) { + sb.append( + """ +
+
+
+
+
+
+ """.trimIndent() + ) + } + } + + sb.append("") + output.writeText(sb.toString()) + } +} + + diff --git a/evaluation/src/main/java/org/fairscan/evaluation/CsvMetadata.kt b/evaluation/src/main/java/org/fairscan/evaluation/CsvMetadata.kt new file mode 100644 index 0000000..44a7ea3 --- /dev/null +++ b/evaluation/src/main/java/org/fairscan/evaluation/CsvMetadata.kt @@ -0,0 +1,54 @@ +/* + * Copyright 2025 Pierre-Yves Nicolas + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.evaluation + +import java.io.File + +data class ImageMeta( + val imgName: String, + val docId: String +) + +data class DocumentMeta( + val docId: String, + val isColored: Boolean +) + +object CsvMetadata { + + fun readImagesCsv(file: File): List { + return file.readLines() + .drop(1) // skip header + .map { line -> + val cols = line.split(',') + ImageMeta( + imgName = cols[0].trim(), + docId = cols[1].trim() + ) + } + } + + fun readDocumentsCsv(file: File): Map { + return file.readLines() + .drop(1) + .map { line -> + val cols = line.split(',') + val docId = cols[0].trim() + val isColored = cols[1].trim().equals("TRUE", ignoreCase = true) + DocumentMeta(docId, isColored) + } + .associateBy { it.docId } + } +} diff --git a/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt b/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt index 511f972..5f2f42c 100644 --- a/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt +++ b/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt @@ -31,7 +31,7 @@ class MatMask(private val mat: Mat) : Mask { override val width: Int get() = mat.width() override val height: Int get() = mat.height() - override fun toMat(): Mat = mat + override fun toMat(): Mat = mat.clone() } object DatasetEvaluator { @@ -71,7 +71,7 @@ object DatasetEvaluator { ?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height()) val corrected: Mat? = if (quad != null) { - extractDocument(inputMat, quad = quad, rotationDegrees = 0) + extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask) } else null val inputOut = File(outputDir, "${e.name}_input.jpg") diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/ColorDetection.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/ColorDetection.kt new file mode 100644 index 0000000..511393d --- /dev/null +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/ColorDetection.kt @@ -0,0 +1,236 @@ +/* + * Copyright 2025 Pierre-Yves Nicolas + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.imageprocessing + +import org.opencv.core.Core +import org.opencv.core.CvType +import org.opencv.core.CvType.CV_8UC1 +import org.opencv.core.Mat +import org.opencv.core.Mat.zeros +import org.opencv.core.MatOfPoint +import org.opencv.core.Scalar +import org.opencv.core.Size +import org.opencv.imgproc.Imgproc +import org.opencv.imgproc.Imgproc.fillConvexPoly +import kotlin.math.roundToInt +import kotlin.math.sqrt + +fun isColoredDocument( + img: Mat, + mask: Mask, + quad: Quad, + chromaThreshold: Double = 17.5, + proportionThreshold: Double = 0.0003, + luminanceMin: Double = 40.0, + luminanceMax: Double = 180.0 +): Boolean { + + // Work on a reasonable size (for correct performance) + val resizedImg = resizeForMaxPixels(img, 1024.0 * 768.0) + val workSize = resizedImg.size() + + // 1) Compute doc mask (mask ∩ quad) + val docMask = documentMask(mask, quad, img.size(), workSize) + + // 2) Apply white balance only inside document + val whiteBalanced = applyGrayWorldToDocument(resizedImg, docMask) + + // 3) Convert to Lab, see https://en.wikipedia.org/wiki/CIELAB_color_space + val lab = Mat() + Imgproc.cvtColor(whiteBalanced, lab, Imgproc.COLOR_BGR2Lab) + + // 4) Split Lab + val channels = ArrayList() + Core.split(lab, channels) + val luminance = channels[0] + val a = channels[1] + val b = channels[2] + + // 5) Compute chroma + val chroma = chroma(a, b) + + val colorMask = Mat() + Imgproc.threshold(chroma, colorMask, chromaThreshold, 255.0, Imgproc.THRESH_BINARY) + colorMask.convertTo(colorMask, CvType.CV_8U) + + // 6) Create luminance mask L ∈ [luminanceMin, luminanceMax] + val luminanceMask = Mat() + Core.inRange(luminance, Scalar(luminanceMin), Scalar(luminanceMax), luminanceMask) + + // 7) Combine colorMask & luminanceMask & docMask + val tmp = Mat() + Core.bitwise_and(colorMask, luminanceMask, tmp) + + val restrictedMask = Mat() + Core.bitwise_and(tmp, docMask, restrictedMask) + + val coloredPixels = Core.countNonZero(restrictedMask) + val totalPixels = Core.countNonZero(docMask) + + // 8) Cleanup + resizedImg.release() + whiteBalanced.release() + lab.release() + channels.forEach { it.release() } + chroma.release() + colorMask.release() + luminanceMask.release() + tmp.release() + restrictedMask.release() + docMask.release() + + if (totalPixels == 0) return false + + val proportion = coloredPixels.toDouble() / totalPixels.toDouble() + return proportion > proportionThreshold +} + +private fun resizeForMaxPixels(img: Mat, maxPixels: Double): Mat { + val origPixels = img.width() * img.height() + if (origPixels <= maxPixels) { + return img.clone() + } + val scale = sqrt(maxPixels / origPixels) + val size = Size(img.width() * scale, img.height() * scale) + val resizedImg = Mat() + Imgproc.resize(img, resizedImg, size, 0.0, 0.0, Imgproc.INTER_AREA) + return resizedImg +} + +private fun chroma(a: Mat, b: Mat): Mat { + val aFloat = Mat() + val bFloat = Mat() + a.convertTo(aFloat, CvType.CV_32F) + b.convertTo(bFloat, CvType.CV_32F) + + val aShifted = Mat() + val bShifted = Mat() + Core.subtract(aFloat, Scalar(128.0), aShifted) + Core.subtract(bFloat, Scalar(128.0), bShifted) + + val chroma = Mat() + Core.magnitude(aShifted, bShifted, chroma) + + aFloat.release() + bFloat.release() + aShifted.release() + bShifted.release() + + return chroma +} + +private fun erodeBorder(mask: Mat, quad: Quad): Mat { + val minDim = quad.edges().minOf { it.norm() } + var k = (minDim * 0.02).roundToInt() + k = k.coerceIn(3, 15) + if (k % 2 == 0) k += 1 + + val kernel = Imgproc.getStructuringElement( + Imgproc.MORPH_ELLIPSE, + Size(k.toDouble(), k.toDouble()) + ) + val erodedMask = Mat() + Imgproc.morphologyEx(mask, erodedMask, Imgproc.MORPH_ERODE, kernel) + kernel.release() + return erodedMask +} + +private fun documentMask( + mask: Mask, + quad: Quad, + origSize: Size, + workSize: Size, +): Mat { + val resizedMask = Mat() + val maskMat = mask.toMat() + Imgproc.resize(maskMat, resizedMask, workSize, 0.0, 0.0, Imgproc.INTER_AREA) + val resizedQuad = quad.scaledTo( + origSize.width, origSize.height, workSize.width, workSize.height + ) + val erodedMask = erodeBorder(resizedMask, resizedQuad) + val quadMask = zeros(erodedMask.size(), CV_8UC1) + val pts = MatOfPoint( + resizedQuad.topLeft.toCv(), resizedQuad.topRight.toCv(), resizedQuad.bottomRight.toCv(), resizedQuad.bottomLeft.toCv()) + fillConvexPoly(quadMask, pts, Scalar(255.0)) + + val docMask = Mat() + Core.bitwise_and(erodedMask, quadMask, docMask) + + quadMask.release() + pts.release() + erodedMask.release() + resizedMask.release() + maskMat.release() + + return docMask +} + +fun applyGrayWorldToDocument( + img: Mat, + docMask: Mat, +): Mat { + require(img.type() == CvType.CV_8UC3) + + val nonZero = Core.countNonZero(docMask) + if (nonZero == 0) { + docMask.release() + return img.clone() + } + + // compute mean per channel on docMask (B,G,R) + val meanScalar = Core.mean(img, docMask) // Scalar(bMean, gMean, rMean, alpha) + val meanB = meanScalar.`val`[0] + val meanG = meanScalar.`val`[1] + val meanR = meanScalar.`val`[2] + + // safety: avoid division by very small values + val eps = 1e-6 + val meanBsafe = if (meanB < eps) eps else meanB + val meanGsafe = if (meanG < eps) eps else meanG + val meanRsafe = if (meanR < eps) eps else meanR + + val meanGray = (meanBsafe + meanGsafe + meanRsafe) / 3.0 + + val scaleB = meanGray / meanBsafe + val scaleG = meanGray / meanGsafe + val scaleR = meanGray / meanRsafe + + // apply per-channel scaling only on docMask + // convert to float + val imgF = Mat() + img.convertTo(imgF, CvType.CV_32FC3) + + // build scales scalar in BGR order + val scales = Scalar(scaleB, scaleG, scaleR) + + // prepare scaled full image (float) + val scaledF = Mat() + Core.multiply(imgF, scales, scaledF) + + // convert scaledF back to 8U + val scaled8 = Mat() + scaledF.convertTo(scaled8, CvType.CV_8UC3) + + // result = original copy, then copy scaled pixels where docMask != 0 + val result = img.clone() + scaled8.copyTo(result, docMask) + + // cleanup + imgF.release() + scaledF.release() + scaled8.release() + + return result +} diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt index 6078447..996eb02 100644 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt @@ -18,7 +18,6 @@ import org.fairscan.imageprocessing.quad.detectDocumentQuadFromProbmap import org.fairscan.imageprocessing.quad.findQuadFromRightAngles import org.fairscan.imageprocessing.quad.minAreaRect import org.opencv.core.Core -import org.opencv.core.CvType import org.opencv.core.Mat import org.opencv.core.MatOfPoint import org.opencv.core.MatOfPoint2f @@ -62,10 +61,7 @@ fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Do } private fun biggestContour(mat: Mat): Pair { - val mat8u = Mat() - mat.convertTo(mat8u, CvType.CV_8UC1, 255.0) - - val refinedMask = refineMask(mat8u) + val refinedMask = refineMask(mat) val blurred = Mat() Imgproc.GaussianBlur(refinedMask, blurred, Size(5.0, 5.0), 0.0) @@ -116,7 +112,12 @@ fun refineMask(original: Mat): Mat { return opened } -fun extractDocument(inputMat: Mat, quad: Quad, rotationDegrees: Int): Mat { +fun extractDocument( + inputMat: Mat, + quad: Quad, + rotationDegrees: Int, + mask: Mask, +): Mat { val widthTop = norm(quad.topLeft, quad.topRight) val widthBottom = norm(quad.bottomLeft, quad.bottomRight) val targetWidth = (widthTop + widthBottom) / 2 @@ -144,7 +145,8 @@ fun extractDocument(inputMat: Mat, quad: Quad, rotationDegrees: Int): Mat { Imgproc.warpPerspective(inputMat, outputMat, transform, outputSize) val resized = resize(outputMat, 1500.0) - val enhanced = enhanceCapturedImage(resized) + val isColored = isColoredDocument(inputMat, mask, quad) + val enhanced = enhanceCapturedImage(resized, isColored) val rotated = rotate(enhanced, rotationDegrees) return rotated diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt index ebd8aa9..f0af04b 100644 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/Geometry.kt @@ -81,9 +81,9 @@ fun createQuad(vertices: List): Quad { return Quad(sorted[0], sorted[1], sorted[2], sorted[3]) } -fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): Quad { - val scaleX = toWidth.toFloat() / fromWidth - val scaleY = toHeight.toFloat() / fromHeight +fun Quad.scaledTo(fromWidth: Double, fromHeight: Double, toWidth: Double, toHeight: Double): Quad { + val scaleX = toWidth / fromWidth + val scaleY = toHeight / fromHeight return Quad( topLeft = topLeft.scaled(scaleX, scaleY), topRight = topRight.scaled(scaleX, scaleY), @@ -92,6 +92,14 @@ fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): ) } -fun Point.scaled(scaleX: Float, scaleY: Float): Point { +fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): Quad { + return scaledTo( + fromWidth.toDouble(), + fromHeight.toDouble(), + toWidth.toDouble(), + toHeight.toDouble()) +} + +fun Point.scaled(scaleX: Double, scaleY: Double): Point { return Point((x * scaleX), (y * scaleY)) } diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/PostProcessing.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/PostProcessing.kt index da09e42..325afc5 100644 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/PostProcessing.kt +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/PostProcessing.kt @@ -22,8 +22,8 @@ import org.opencv.core.Size import org.opencv.imgproc.Imgproc import kotlin.math.max -fun enhanceCapturedImage(img: Mat): Mat { - return if (isColoredDocument(img)) { +fun enhanceCapturedImage(img: Mat, isColored: Boolean): Mat { + return if (isColored) { val result = Mat() Core.convertScaleAbs(img, result, 1.2, 10.0) result @@ -36,63 +36,6 @@ fun enhanceCapturedImage(img: Mat): Mat { } } -fun isColoredDocument( - img: Mat, - chromaThreshold: Double = 20.0, - proportionThreshold: Double = 0.001 -): Boolean { - val lab = Mat() - Imgproc.cvtColor(img, lab, Imgproc.COLOR_BGR2Lab) - - val channels = ArrayList() - Core.split(lab, channels) - val a = channels[1] - val b = channels[2] - - val aFloat = Mat() - val bFloat = Mat() - a.convertTo(aFloat, CvType.CV_32F) - b.convertTo(bFloat, CvType.CV_32F) - - val aShifted = Mat() - val bShifted = Mat() - Core.subtract(aFloat, Scalar(128.0), aShifted) - Core.subtract(bFloat, Scalar(128.0), bShifted) - - val aSq = Mat() - val bSq = Mat() - Core.multiply(aShifted, aShifted, aSq) - Core.multiply(bShifted, bShifted, bSq) - - val sumSq = Mat() - Core.add(aSq, bSq, sumSq) - - val chroma = Mat() - Core.sqrt(sumSq, chroma) - - val mask = Mat() - Imgproc.threshold(chroma, mask, chromaThreshold, 1.0, Imgproc.THRESH_BINARY) - val coloredPixels = Core.countNonZero(mask) - - val totalPixels = chroma.rows() * chroma.cols() - val proportion = coloredPixels.toDouble() / totalPixels.toDouble() - - lab.release() - channels.forEach { it.release() } - aFloat.release() - bFloat.release() - aShifted.release() - bShifted.release() - aSq.release() - bSq.release() - sumSq.release() - chroma.release() - mask.release() - - return proportion > proportionThreshold -} - - private fun multiScaleRetinex(img: Mat): Mat { val imageSize = img.size() val maxDim = max(imageSize.width, imageSize.height)