Improve grayscale postprocessing (#138)

Apply exp() to the Retinex output before normalization, restoring a
linear tone distribution. This improves contrast overall: light gray
content is preserved, and dark content stays dark even when the image
contains deep blacks. The previous normalization compressed all tones
toward the same gray level. Trade-off: shadows and unwanted background
variations are also more visible in some images.

Other:
- stretch toward white using histogram mode
- fall back to simple normalization on documents with large dark areas
- add bilateral denoising
This commit is contained in:
Pierre-Yves Nicolas
2026-03-20 11:40:58 +01:00
committed by GitHub
parent 0e8037fa8d
commit 58abc2f3fe

View File

@@ -29,11 +29,7 @@ fun enhanceCapturedImage(img: Mat, isColored: Boolean): Mat {
return if (isColored) { return if (isColored) {
multiScaleRetinexOnL(img) multiScaleRetinexOnL(img)
} else { } else {
val gray = multiScaleRetinex(img) enhanceGrayscaleImage(img)
val contrastedGray = enhanceContrastAuto(gray)
val result = Mat()
Imgproc.cvtColor(contrastedGray, result, Imgproc.COLOR_GRAY2BGR)
result
} }
} }
@@ -199,31 +195,29 @@ fun percentileL(l: Mat, p: Double): Double {
return 255.0 return 255.0
} }
private fun multiScaleRetinex(img: Mat): Mat { fun enhanceGrayscaleImage(img: Mat): Mat {
val imageSize = img.size()
val maxDim = max(imageSize.width, imageSize.height)
val kernelSizes: List<Double> = listOf(maxDim / 50, maxDim / 3)
// Convert to grayscale (1 channel) // -- 1. Convert to grayscale --------
val gray = Mat() val gray = Mat()
if (img.channels() == 4) { when (img.channels()) {
Imgproc.cvtColor(img, gray, Imgproc.COLOR_BGRA2GRAY) 4 -> Imgproc.cvtColor(img, gray, Imgproc.COLOR_BGRA2GRAY)
} else if (img.channels() == 3) { 3 -> Imgproc.cvtColor(img, gray, Imgproc.COLOR_BGR2GRAY)
Imgproc.cvtColor(img, gray, Imgproc.COLOR_BGR2GRAY) else -> img.copyTo(gray)
} else {
img.copyTo(gray)
} }
// -- 2. Multi-scale Retinex ---------
val maxDim = max(gray.cols(), gray.rows()).toDouble()
val imgFloat = Mat() val imgFloat = Mat()
gray.convertTo(imgFloat, CvType.CV_32F) gray.convertTo(imgFloat, CvType.CV_32F)
Core.add(imgFloat, Scalar(1.0), imgFloat) // img + 1 Core.add(imgFloat, Scalar(1.0), imgFloat)
val weight = 1.0 / kernelSizes.size
val retinex = Mat.zeros(gray.size(), CvType.CV_32F)
val logImg = Mat() val logImg = Mat()
Core.log(imgFloat, logImg) Core.log(imgFloat, logImg)
val kernelSizes = listOf(maxDim / 6, maxDim / 50)
val weight = 1.0 / kernelSizes.size
val retinex = Mat.zeros(gray.size(), CvType.CV_32F)
val blur = Mat() val blur = Mat()
val logBlur = Mat() val logBlur = Mat()
val diff = Mat() val diff = Mat()
@@ -232,7 +226,6 @@ private fun multiScaleRetinex(img: Mat): Mat {
Imgproc.boxFilter(imgFloat, blur, -1, Size(kernelSize, kernelSize)) Imgproc.boxFilter(imgFloat, blur, -1, Size(kernelSize, kernelSize))
Core.add(blur, Scalar(1.0), blur) Core.add(blur, Scalar(1.0), blur)
Core.log(blur, logBlur) Core.log(blur, logBlur)
Core.subtract(logImg, logBlur, diff) Core.subtract(logImg, logBlur, diff)
val diffGray = Mat() val diffGray = Mat()
if (diff.channels() > 1) { if (diff.channels() > 1) {
@@ -244,65 +237,93 @@ private fun multiScaleRetinex(img: Mat): Mat {
diffGray.release() diffGray.release()
} }
// Normalize // -- 3. exp() + p1/p99 normalization ---------
val minMax = Core.minMaxLoc(retinex) // exp() compensates for the compression of bright tones caused by
// the Retinex log-space computation, making annotations and light
// gray areas more visible.
val retinexExp = Mat()
Core.exp(retinex, retinexExp)
val flat = Mat()
retinexExp.reshape(1, 1).copyTo(flat)
val sorted = Mat()
Core.sort(flat, sorted, Core.SORT_ASCENDING)
val n = sorted.cols()
val pLow = sorted.get(0, (n * 0.01).toInt())[0]
val pHigh = sorted.get(0, (n * 0.99).toInt())[0]
flat.release(); sorted.release()
val normalized = Mat() val normalized = Mat()
Core.subtract(retinex, Scalar(minMax.minVal), normalized) Core.subtract(retinexExp, Scalar(pLow), normalized)
val scale = if (minMax.maxVal > minMax.minVal) 255.0 / (minMax.maxVal - minMax.minVal) else 1.0 val scale = if (pHigh > pLow) 255.0 / (pHigh - pLow) else 1.0
Core.multiply(normalized, Scalar(scale), normalized) Core.multiply(normalized, Scalar(scale), normalized)
Core.min(normalized, Scalar(255.0), normalized)
Core.max(normalized, Scalar(0.0), normalized)
retinexExp.release()
val result = Mat() val result8u = Mat()
normalized.convertTo(result, CvType.CV_8U) normalized.convertTo(result8u, CvType.CV_8U)
// Cleanup
gray.release()
imgFloat.release()
retinex.release()
logImg.release()
blur.release()
logBlur.release()
diff.release()
normalized.release() normalized.release()
return result // -- 4. Stretch toward white --------
} // Find the histogram mode in [180..255] as an estimate of the background level,
// then stretch so that level maps to 255.
// If modeVal >= 254, Retinex has over-amplified the image (typically happens
// when the document contains large dark areas). In that case, fall back to
// a simple normalization of the original grayscale image.
val hist = Mat()
Imgproc.calcHist(listOf(result8u), MatOfInt(0), Mat(), hist,
MatOfInt(256), MatOfFloat(0f, 256f))
private fun enhanceContrastAuto(img: Mat): Mat { var modeVal = 220; var modeCount = 0.0
val gray = if (img.channels() == 1) img else { for (i in 180 until 256) {
val tmp = Mat() val c = hist.get(i, 0)[0]
Imgproc.cvtColor(img, tmp, Imgproc.COLOR_BGR2GRAY) if (c > modeCount) { modeCount = c; modeVal = i }
tmp }
hist.release()
val stretched8u = Mat()
if (modeVal >= 254) {
val grayF = Mat()
gray.convertTo(grayF, CvType.CV_32F)
val grayFlat = Mat()
grayF.reshape(1, 1).copyTo(grayFlat)
val graySorted = Mat()
Core.sort(grayFlat, graySorted, Core.SORT_ASCENDING)
val gN = graySorted.cols()
val gLow = graySorted.get(0, (gN * 0.01).toInt())[0]
val gHigh = graySorted.get(0, (gN * 0.99).toInt())[0]
grayFlat.release(); graySorted.release()
Core.subtract(grayF, Scalar(gLow), grayF)
Core.multiply(grayF, Scalar(255.0 / (gHigh - gLow + 1e-6)), grayF)
Core.min(grayF, Scalar(255.0), grayF)
Core.max(grayF, Scalar(0.0), grayF)
grayF.convertTo(stretched8u, CvType.CV_8U)
grayF.release()
} else {
val stretchedF = Mat()
result8u.convertTo(stretchedF, CvType.CV_32F)
Core.multiply(stretchedF, Scalar(255.0 / modeVal), stretchedF)
Core.min(stretchedF, Scalar(255.0), stretchedF)
stretchedF.convertTo(stretched8u, CvType.CV_8U)
stretchedF.release()
} }
// Flatten and sort pixel values // -- 5. Bilateral denoising ---------
val flat = Mat() // Smooths background texture and fine grain amplified by exp() and stretch,
gray.reshape(1, 1).convertTo(flat, CvType.CV_32F) // while preserving sharp edges (text, lines, annotations).
val sortedVals = Mat() val denoised = Mat()
Core.sort(flat, sortedVals, Core.SORT_ASCENDING) Imgproc.bilateralFilter(stretched8u, denoised, 9, 20.0, 10.0)
val totalPixels = sortedVals.cols() val finalBgr = Mat()
val pLow = sortedVals.get(0, (totalPixels * 0.005).toInt())[0] Imgproc.cvtColor(denoised, finalBgr, Imgproc.COLOR_GRAY2BGR)
val pHigh = sortedVals.get(0, (totalPixels * 0.80).toInt())[0]
flat.release() // -- Cleanup -----------
sortedVals.release() gray.release(); imgFloat.release(); logImg.release()
blur.release(); logBlur.release(); diff.release()
retinex.release(); result8u.release()
stretched8u.release(); denoised.release()
val imgF = Mat() return finalBgr
img.convertTo(imgF, CvType.CV_32F)
val adjusted = Mat()
Core.subtract(imgF, Scalar(pLow), adjusted)
Core.multiply(adjusted, Scalar(255.0 / max((pHigh - pLow), 1.0)), adjusted)
Core.min(adjusted, Scalar(255.0), adjusted)
Core.max(adjusted, Scalar(0.0), adjusted)
val result = Mat()
adjusted.convertTo(result, CvType.CV_8U)
imgF.release()
adjusted.release()
val final = Mat()
Core.convertScaleAbs(result, final, 1.15, -25.0)
result.release()
return final
} }