Improve distinction between color and grayscale documents (#79)

Better differentiate color and grayscale documents:
- Look for colored pixels only in the intersection of the mask and quadrilateral
- Apply a white balance (grey world) to the document
- Exclude pixels with extreme luminance
- Erode segmentation mask
This commit is contained in:
pynicolas
2025-12-10 17:08:21 +01:00
committed by GitHub
parent 87433fa96a
commit fe91f3e241
10 changed files with 509 additions and 86 deletions

View File

@@ -151,9 +151,17 @@ class ImageSegmentationService(private val context: Context, private val logger:
} }
override fun toMat(): Mat { override fun toMat(): Mat {
val mat = Mat(height, width, CvType.CV_32FC1) val threshold = 0.5f
mat.put(0, 0, probmap)
return mat val mask = Mat(height, width, CvType.CV_8UC1)
val data = ByteArray(width * height)
for (i in probmap.indices) {
data[i] = if (probmap[i] >= threshold) 255.toByte() else 0.toByte()
}
mask.put(0, 0, data)
return mask
} }
} }

View File

@@ -31,12 +31,15 @@ import kotlinx.coroutines.flow.map
import kotlinx.coroutines.launch import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext import kotlinx.coroutines.withContext
import org.fairscan.app.AppContainer import org.fairscan.app.AppContainer
import org.fairscan.imageprocessing.Mask
import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.detectDocumentQuad import org.fairscan.imageprocessing.detectDocumentQuad
import org.fairscan.imageprocessing.extractDocument import org.fairscan.imageprocessing.extractDocument
import org.fairscan.imageprocessing.scaledTo import org.fairscan.imageprocessing.scaledTo
import org.opencv.android.Utils import org.opencv.android.Utils
import org.opencv.core.CvType
import org.opencv.core.Mat import org.opencv.core.Mat
import org.opencv.imgproc.Imgproc
import java.io.ByteArrayOutputStream import java.io.ByteArrayOutputStream
sealed interface CameraEvent { sealed interface CameraEvent {
@@ -130,6 +133,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
if (segmentation != null) { if (segmentation != null) {
val mask = segmentation.segmentation val mask = segmentation.segmentation
var quad = detectDocumentQuad(mask, isLiveAnalysis = false) var quad = detectDocumentQuad(mask, isLiveAnalysis = false)
val rotationDegrees = imageProxy.imageInfo.rotationDegrees
if (quad == null) { if (quad == null) {
val now = System.currentTimeMillis() val now = System.currentTimeMillis()
lastSuccessfulLiveAnalysisState?.timestamp?.let { lastSuccessfulLiveAnalysisState?.timestamp?.let {
@@ -139,7 +143,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
val recentLive = lastSuccessfulLiveAnalysisState?.takeIf { val recentLive = lastSuccessfulLiveAnalysisState?.takeIf {
now - it.timestamp <= 1500 now - it.timestamp <= 1500
} }
val rotations = (-imageProxy.imageInfo.rotationDegrees / 90) + 4 val rotations = (-rotationDegrees / 90) + 4
quad = recentLive?.documentQuad?.rotate90(rotations, mask.width, mask.height) quad = recentLive?.documentQuad?.rotate90(rotations, mask.width, mask.height)
if (quad != null) { if (quad != null) {
Log.i("Quad", "Using quad taken in live analysis; rotations=$rotations") Log.i("Quad", "Using quad taken in live analysis; rotations=$rotations")
@@ -147,7 +151,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
} }
if (quad != null) { if (quad != null) {
val resizedQuad = quad.scaledTo(mask.width, mask.height, bitmap.width, bitmap.height) val resizedQuad = quad.scaledTo(mask.width, mask.height, bitmap.width, bitmap.height)
corrected = extractDocumentFromBitmap(bitmap, resizedQuad, imageProxy.imageInfo.rotationDegrees) corrected = extractDocumentFromBitmap(bitmap, resizedQuad, rotationDegrees, mask)
} }
} }
return@withContext corrected return@withContext corrected
@@ -180,18 +184,33 @@ sealed class CaptureState {
data class CaptureError(override val frozenImage: Bitmap) : CaptureState() data class CaptureError(override val frozenImage: Bitmap) : CaptureState()
data class CapturePreview( data class CapturePreview(
override val frozenImage: Bitmap, override val frozenImage: Bitmap,
val processed: Bitmap val processed: Bitmap,
) : CaptureState() ) : CaptureState()
} }
fun extractDocumentFromBitmap(originalBitmap: Bitmap, quad: Quad, rotationDegrees: Int): Bitmap { fun extractDocumentFromBitmap(image: Bitmap, quad: Quad, rotationDegrees: Int, mask: Mask): Bitmap {
val inputMat = Mat() val rgba = Mat()
Utils.bitmapToMat(originalBitmap, inputMat) Utils.bitmapToMat(image, rgba)
return toBitmap(extractDocument(inputMat, quad, rotationDegrees)) val bgr = Mat()
Imgproc.cvtColor(rgba, bgr, Imgproc.COLOR_RGBA2BGR) // CV_8UC4 → CV_8UC3
rgba.release()
val outBgr = extractDocument(bgr, quad, rotationDegrees, mask)
bgr.release()
val outBitmap = toBitmap(outBgr)
outBgr.release()
return outBitmap
} }
private fun toBitmap(mat: Mat): Bitmap { fun toBitmap(bgr: Mat): Bitmap {
val outputBitmap = createBitmap(mat.cols(), mat.rows()) require(bgr.type() == CvType.CV_8UC3)
Utils.matToBitmap(mat, outputBitmap)
return outputBitmap val rgba = Mat()
Imgproc.cvtColor(bgr, rgba, Imgproc.COLOR_BGR2RGBA)
val bmp = createBitmap(bgr.cols(), bgr.rows(), Bitmap.Config.ARGB_8888)
Utils.matToBitmap(rgba, bmp)
rgba.release()
return bmp
} }

View File

@@ -1,5 +1,6 @@
/build /build
/dataset/images /dataset/images
/dataset/masks /dataset/masks
/dataset/metadata
/python/venv /python/venv
/reports /reports

View File

@@ -0,0 +1,152 @@
/*
* Copyright 2025 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.evaluation
import org.fairscan.imageprocessing.detectDocumentQuad
import org.fairscan.imageprocessing.extractDocument
import org.fairscan.imageprocessing.isColoredDocument
import org.fairscan.imageprocessing.scaledTo
import org.opencv.core.Mat
import org.opencv.imgcodecs.Imgcodecs
import java.io.File
fun main() {
nu.pattern.OpenCV.loadLocally()
ColorDetectionEvaluator.run()
}
object ColorDetectionEvaluator {
fun run() {
val root = File("evaluation")
val datasetDir = File(root, "dataset")
val metadataDir = File(datasetDir, "metadata")
val outputDir = File("evaluation/reports/color_detection").apply { mkdirs() }
val imageMetas = CsvMetadata.readImagesCsv(File(metadataDir, "images.csv"))
val documentMetas = CsvMetadata.readDocumentsCsv(File(metadataDir, "documents.csv"))
val results = mutableListOf<ColorResult>()
var nbProcessedImages = 0
for (meta in imageMetas) {
val expected = documentMetas[meta.docId]?.isColored ?: continue
val imgName = meta.imgName.removeSuffix(".jpg")
val imgFile = File(datasetDir, "images/$imgName.jpg")
val maskFile = File(datasetDir, "masks/$imgName.png")
if (!imgFile.exists() || !maskFile.exists()) continue
val mat = Imgcodecs.imread(imgFile.absolutePath)
if (mat.empty()) continue
val maskMat = Imgcodecs.imread(maskFile.absolutePath, Imgcodecs.IMREAD_UNCHANGED)
if (maskMat.empty()) continue
println("Processing ${imgName}...")
val mask = MatMask(maskMat)
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
?.scaledTo(mask.width, mask.height, mat.width(), mat.height())
val document: Mat = if (quad != null) {
extractDocument(mat, quad, 0, mask)
} else continue
val detected = isColoredDocument(mat, mask, quad)
nbProcessedImages++
val inputOut = File(outputDir, "${imgName}_input.jpg")
Imgcodecs.imwrite(inputOut.absolutePath, mat)
val outputOut = File(outputDir, "${imgName}_output.jpg")
Imgcodecs.imwrite(outputOut.absolutePath, document)
results += ColorResult(
imgName,
originalFile = inputOut,
documentFile = outputOut,
colorCase = ColorCase(expected, detected),
)
}
ColorDetectionReport.writeHtml(
File(outputDir, "index.html"),
Score(results.groupingBy { it.colorCase }.eachCount()),
results
)
}
}
data class ColorCase(
val expected: Boolean,
val detected: Boolean
) {
val isMismatch: Boolean get() = expected != detected
}
data class ColorResult(
val imgName: String,
val originalFile: File,
val documentFile: File,
val colorCase: ColorCase
)
data class Score(
val byCase: Map<ColorCase, Int>
) {
val total: Int get() = byCase.values.sum()
val mismatchCount: Int get() = byCase.filter { it.key.isMismatch }.values.sum()
val accuracy: Double get() = 1.0 - mismatchCount.toDouble() / total
}
object ColorDetectionReport {
fun writeHtml(output: File, score: Score, results: List<ColorResult>) {
val sb = StringBuilder()
sb.append("<html><body>")
sb.append("<h1>Color Detection Evaluation</h1>")
sb.append("<p>Total: ${score.total}</p>")
sb.append("<p>Mismatches: ${score.mismatchCount}</p>")
sb.append("<p>Accuracy: ${"%.2f".format(score.accuracy * 100)}%</p>")
score.byCase.forEach { (case, count) ->
sb.append("<p>expectedColor=${case.expected} / detectedColor=${case.detected} : $count</p>")
}
for (c in listOf(ColorCase(true, false), ColorCase(false, true))) {
sb.append("<h2>expectedColor=${c.expected} / detectedColor=${c.detected}</h2>")
for (r in results.filter { it.colorCase == c }) {
sb.append(
"""
<div style="margin-bottom:20px;">
<div style="display:flex; gap:20px;">
<div><img width="300" src="${r.originalFile.name}" /></div>
<div><img width="300" src="${r.documentFile.name}" /></div>
</div>
</div>
""".trimIndent()
)
}
}
sb.append("</body></html>")
output.writeText(sb.toString())
}
}

View File

@@ -0,0 +1,54 @@
/*
* Copyright 2025 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.evaluation
import java.io.File
data class ImageMeta(
val imgName: String,
val docId: String
)
data class DocumentMeta(
val docId: String,
val isColored: Boolean
)
object CsvMetadata {
fun readImagesCsv(file: File): List<ImageMeta> {
return file.readLines()
.drop(1) // skip header
.map { line ->
val cols = line.split(',')
ImageMeta(
imgName = cols[0].trim(),
docId = cols[1].trim()
)
}
}
fun readDocumentsCsv(file: File): Map<String, DocumentMeta> {
return file.readLines()
.drop(1)
.map { line ->
val cols = line.split(',')
val docId = cols[0].trim()
val isColored = cols[1].trim().equals("TRUE", ignoreCase = true)
DocumentMeta(docId, isColored)
}
.associateBy { it.docId }
}
}

View File

@@ -31,7 +31,7 @@ class MatMask(private val mat: Mat) : Mask {
override val width: Int get() = mat.width() override val width: Int get() = mat.width()
override val height: Int get() = mat.height() override val height: Int get() = mat.height()
override fun toMat(): Mat = mat override fun toMat(): Mat = mat.clone()
} }
object DatasetEvaluator { object DatasetEvaluator {
@@ -71,7 +71,7 @@ object DatasetEvaluator {
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height()) ?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
val corrected: Mat? = if (quad != null) { val corrected: Mat? = if (quad != null) {
extractDocument(inputMat, quad = quad, rotationDegrees = 0) extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask)
} else null } else null
val inputOut = File(outputDir, "${e.name}_input.jpg") val inputOut = File(outputDir, "${e.name}_input.jpg")

View File

@@ -0,0 +1,236 @@
/*
* Copyright 2025 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.imageprocessing
import org.opencv.core.Core
import org.opencv.core.CvType
import org.opencv.core.CvType.CV_8UC1
import org.opencv.core.Mat
import org.opencv.core.Mat.zeros
import org.opencv.core.MatOfPoint
import org.opencv.core.Scalar
import org.opencv.core.Size
import org.opencv.imgproc.Imgproc
import org.opencv.imgproc.Imgproc.fillConvexPoly
import kotlin.math.roundToInt
import kotlin.math.sqrt
fun isColoredDocument(
img: Mat,
mask: Mask,
quad: Quad,
chromaThreshold: Double = 17.5,
proportionThreshold: Double = 0.0003,
luminanceMin: Double = 40.0,
luminanceMax: Double = 180.0
): Boolean {
// Work on a reasonable size (for correct performance)
val resizedImg = resizeForMaxPixels(img, 1024.0 * 768.0)
val workSize = resizedImg.size()
// 1) Compute doc mask (mask ∩ quad)
val docMask = documentMask(mask, quad, img.size(), workSize)
// 2) Apply white balance only inside document
val whiteBalanced = applyGrayWorldToDocument(resizedImg, docMask)
// 3) Convert to Lab, see https://en.wikipedia.org/wiki/CIELAB_color_space
val lab = Mat()
Imgproc.cvtColor(whiteBalanced, lab, Imgproc.COLOR_BGR2Lab)
// 4) Split Lab
val channels = ArrayList<Mat>()
Core.split(lab, channels)
val luminance = channels[0]
val a = channels[1]
val b = channels[2]
// 5) Compute chroma
val chroma = chroma(a, b)
val colorMask = Mat()
Imgproc.threshold(chroma, colorMask, chromaThreshold, 255.0, Imgproc.THRESH_BINARY)
colorMask.convertTo(colorMask, CvType.CV_8U)
// 6) Create luminance mask L ∈ [luminanceMin, luminanceMax]
val luminanceMask = Mat()
Core.inRange(luminance, Scalar(luminanceMin), Scalar(luminanceMax), luminanceMask)
// 7) Combine colorMask & luminanceMask & docMask
val tmp = Mat()
Core.bitwise_and(colorMask, luminanceMask, tmp)
val restrictedMask = Mat()
Core.bitwise_and(tmp, docMask, restrictedMask)
val coloredPixels = Core.countNonZero(restrictedMask)
val totalPixels = Core.countNonZero(docMask)
// 8) Cleanup
resizedImg.release()
whiteBalanced.release()
lab.release()
channels.forEach { it.release() }
chroma.release()
colorMask.release()
luminanceMask.release()
tmp.release()
restrictedMask.release()
docMask.release()
if (totalPixels == 0) return false
val proportion = coloredPixels.toDouble() / totalPixels.toDouble()
return proportion > proportionThreshold
}
private fun resizeForMaxPixels(img: Mat, maxPixels: Double): Mat {
val origPixels = img.width() * img.height()
if (origPixels <= maxPixels) {
return img.clone()
}
val scale = sqrt(maxPixels / origPixels)
val size = Size(img.width() * scale, img.height() * scale)
val resizedImg = Mat()
Imgproc.resize(img, resizedImg, size, 0.0, 0.0, Imgproc.INTER_AREA)
return resizedImg
}
private fun chroma(a: Mat, b: Mat): Mat {
val aFloat = Mat()
val bFloat = Mat()
a.convertTo(aFloat, CvType.CV_32F)
b.convertTo(bFloat, CvType.CV_32F)
val aShifted = Mat()
val bShifted = Mat()
Core.subtract(aFloat, Scalar(128.0), aShifted)
Core.subtract(bFloat, Scalar(128.0), bShifted)
val chroma = Mat()
Core.magnitude(aShifted, bShifted, chroma)
aFloat.release()
bFloat.release()
aShifted.release()
bShifted.release()
return chroma
}
private fun erodeBorder(mask: Mat, quad: Quad): Mat {
val minDim = quad.edges().minOf { it.norm() }
var k = (minDim * 0.02).roundToInt()
k = k.coerceIn(3, 15)
if (k % 2 == 0) k += 1
val kernel = Imgproc.getStructuringElement(
Imgproc.MORPH_ELLIPSE,
Size(k.toDouble(), k.toDouble())
)
val erodedMask = Mat()
Imgproc.morphologyEx(mask, erodedMask, Imgproc.MORPH_ERODE, kernel)
kernel.release()
return erodedMask
}
private fun documentMask(
mask: Mask,
quad: Quad,
origSize: Size,
workSize: Size,
): Mat {
val resizedMask = Mat()
val maskMat = mask.toMat()
Imgproc.resize(maskMat, resizedMask, workSize, 0.0, 0.0, Imgproc.INTER_AREA)
val resizedQuad = quad.scaledTo(
origSize.width, origSize.height, workSize.width, workSize.height
)
val erodedMask = erodeBorder(resizedMask, resizedQuad)
val quadMask = zeros(erodedMask.size(), CV_8UC1)
val pts = MatOfPoint(
resizedQuad.topLeft.toCv(), resizedQuad.topRight.toCv(), resizedQuad.bottomRight.toCv(), resizedQuad.bottomLeft.toCv())
fillConvexPoly(quadMask, pts, Scalar(255.0))
val docMask = Mat()
Core.bitwise_and(erodedMask, quadMask, docMask)
quadMask.release()
pts.release()
erodedMask.release()
resizedMask.release()
maskMat.release()
return docMask
}
fun applyGrayWorldToDocument(
img: Mat,
docMask: Mat,
): Mat {
require(img.type() == CvType.CV_8UC3)
val nonZero = Core.countNonZero(docMask)
if (nonZero == 0) {
docMask.release()
return img.clone()
}
// compute mean per channel on docMask (B,G,R)
val meanScalar = Core.mean(img, docMask) // Scalar(bMean, gMean, rMean, alpha)
val meanB = meanScalar.`val`[0]
val meanG = meanScalar.`val`[1]
val meanR = meanScalar.`val`[2]
// safety: avoid division by very small values
val eps = 1e-6
val meanBsafe = if (meanB < eps) eps else meanB
val meanGsafe = if (meanG < eps) eps else meanG
val meanRsafe = if (meanR < eps) eps else meanR
val meanGray = (meanBsafe + meanGsafe + meanRsafe) / 3.0
val scaleB = meanGray / meanBsafe
val scaleG = meanGray / meanGsafe
val scaleR = meanGray / meanRsafe
// apply per-channel scaling only on docMask
// convert to float
val imgF = Mat()
img.convertTo(imgF, CvType.CV_32FC3)
// build scales scalar in BGR order
val scales = Scalar(scaleB, scaleG, scaleR)
// prepare scaled full image (float)
val scaledF = Mat()
Core.multiply(imgF, scales, scaledF)
// convert scaledF back to 8U
val scaled8 = Mat()
scaledF.convertTo(scaled8, CvType.CV_8UC3)
// result = original copy, then copy scaled pixels where docMask != 0
val result = img.clone()
scaled8.copyTo(result, docMask)
// cleanup
imgF.release()
scaledF.release()
scaled8.release()
return result
}

View File

@@ -18,7 +18,6 @@ import org.fairscan.imageprocessing.quad.detectDocumentQuadFromProbmap
import org.fairscan.imageprocessing.quad.findQuadFromRightAngles import org.fairscan.imageprocessing.quad.findQuadFromRightAngles
import org.fairscan.imageprocessing.quad.minAreaRect import org.fairscan.imageprocessing.quad.minAreaRect
import org.opencv.core.Core import org.opencv.core.Core
import org.opencv.core.CvType
import org.opencv.core.Mat import org.opencv.core.Mat
import org.opencv.core.MatOfPoint import org.opencv.core.MatOfPoint
import org.opencv.core.MatOfPoint2f import org.opencv.core.MatOfPoint2f
@@ -62,10 +61,7 @@ fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Do
} }
private fun biggestContour(mat: Mat): Pair<MatOfPoint2f?, Double> { private fun biggestContour(mat: Mat): Pair<MatOfPoint2f?, Double> {
val mat8u = Mat() val refinedMask = refineMask(mat)
mat.convertTo(mat8u, CvType.CV_8UC1, 255.0)
val refinedMask = refineMask(mat8u)
val blurred = Mat() val blurred = Mat()
Imgproc.GaussianBlur(refinedMask, blurred, Size(5.0, 5.0), 0.0) Imgproc.GaussianBlur(refinedMask, blurred, Size(5.0, 5.0), 0.0)
@@ -116,7 +112,12 @@ fun refineMask(original: Mat): Mat {
return opened return opened
} }
fun extractDocument(inputMat: Mat, quad: Quad, rotationDegrees: Int): Mat { fun extractDocument(
inputMat: Mat,
quad: Quad,
rotationDegrees: Int,
mask: Mask,
): Mat {
val widthTop = norm(quad.topLeft, quad.topRight) val widthTop = norm(quad.topLeft, quad.topRight)
val widthBottom = norm(quad.bottomLeft, quad.bottomRight) val widthBottom = norm(quad.bottomLeft, quad.bottomRight)
val targetWidth = (widthTop + widthBottom) / 2 val targetWidth = (widthTop + widthBottom) / 2
@@ -144,7 +145,8 @@ fun extractDocument(inputMat: Mat, quad: Quad, rotationDegrees: Int): Mat {
Imgproc.warpPerspective(inputMat, outputMat, transform, outputSize) Imgproc.warpPerspective(inputMat, outputMat, transform, outputSize)
val resized = resize(outputMat, 1500.0) val resized = resize(outputMat, 1500.0)
val enhanced = enhanceCapturedImage(resized) val isColored = isColoredDocument(inputMat, mask, quad)
val enhanced = enhanceCapturedImage(resized, isColored)
val rotated = rotate(enhanced, rotationDegrees) val rotated = rotate(enhanced, rotationDegrees)
return rotated return rotated

View File

@@ -81,9 +81,9 @@ fun createQuad(vertices: List<Point>): Quad {
return Quad(sorted[0], sorted[1], sorted[2], sorted[3]) return Quad(sorted[0], sorted[1], sorted[2], sorted[3])
} }
fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): Quad { fun Quad.scaledTo(fromWidth: Double, fromHeight: Double, toWidth: Double, toHeight: Double): Quad {
val scaleX = toWidth.toFloat() / fromWidth val scaleX = toWidth / fromWidth
val scaleY = toHeight.toFloat() / fromHeight val scaleY = toHeight / fromHeight
return Quad( return Quad(
topLeft = topLeft.scaled(scaleX, scaleY), topLeft = topLeft.scaled(scaleX, scaleY),
topRight = topRight.scaled(scaleX, scaleY), topRight = topRight.scaled(scaleX, scaleY),
@@ -92,6 +92,14 @@ fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int):
) )
} }
fun Point.scaled(scaleX: Float, scaleY: Float): Point { fun Quad.scaledTo(fromWidth: Int, fromHeight: Int, toWidth: Int, toHeight: Int): Quad {
return scaledTo(
fromWidth.toDouble(),
fromHeight.toDouble(),
toWidth.toDouble(),
toHeight.toDouble())
}
fun Point.scaled(scaleX: Double, scaleY: Double): Point {
return Point((x * scaleX), (y * scaleY)) return Point((x * scaleX), (y * scaleY))
} }

View File

@@ -22,8 +22,8 @@ import org.opencv.core.Size
import org.opencv.imgproc.Imgproc import org.opencv.imgproc.Imgproc
import kotlin.math.max import kotlin.math.max
fun enhanceCapturedImage(img: Mat): Mat { fun enhanceCapturedImage(img: Mat, isColored: Boolean): Mat {
return if (isColoredDocument(img)) { return if (isColored) {
val result = Mat() val result = Mat()
Core.convertScaleAbs(img, result, 1.2, 10.0) Core.convertScaleAbs(img, result, 1.2, 10.0)
result result
@@ -36,63 +36,6 @@ fun enhanceCapturedImage(img: Mat): Mat {
} }
} }
fun isColoredDocument(
img: Mat,
chromaThreshold: Double = 20.0,
proportionThreshold: Double = 0.001
): Boolean {
val lab = Mat()
Imgproc.cvtColor(img, lab, Imgproc.COLOR_BGR2Lab)
val channels = ArrayList<Mat>()
Core.split(lab, channels)
val a = channels[1]
val b = channels[2]
val aFloat = Mat()
val bFloat = Mat()
a.convertTo(aFloat, CvType.CV_32F)
b.convertTo(bFloat, CvType.CV_32F)
val aShifted = Mat()
val bShifted = Mat()
Core.subtract(aFloat, Scalar(128.0), aShifted)
Core.subtract(bFloat, Scalar(128.0), bShifted)
val aSq = Mat()
val bSq = Mat()
Core.multiply(aShifted, aShifted, aSq)
Core.multiply(bShifted, bShifted, bSq)
val sumSq = Mat()
Core.add(aSq, bSq, sumSq)
val chroma = Mat()
Core.sqrt(sumSq, chroma)
val mask = Mat()
Imgproc.threshold(chroma, mask, chromaThreshold, 1.0, Imgproc.THRESH_BINARY)
val coloredPixels = Core.countNonZero(mask)
val totalPixels = chroma.rows() * chroma.cols()
val proportion = coloredPixels.toDouble() / totalPixels.toDouble()
lab.release()
channels.forEach { it.release() }
aFloat.release()
bFloat.release()
aShifted.release()
bShifted.release()
aSq.release()
bSq.release()
sumSq.release()
chroma.release()
mask.release()
return proportion > proportionThreshold
}
private fun multiScaleRetinex(img: Mat): Mat { private fun multiScaleRetinex(img: Mat): Mat {
val imageSize = img.size() val imageSize = img.size()
val maxDim = max(imageSize.width, imageSize.height) val maxDim = max(imageSize.width, imageSize.height)