Improve perspective correction with camera intrinsics (#182)

This commit is contained in:
Pierre-Yves Nicolas
2026-05-20 20:33:03 +02:00
committed by GitHub
parent d9844be4de
commit 27ad5efeff
12 changed files with 114 additions and 48 deletions

View File

@@ -15,6 +15,7 @@
package org.fairscan.app.data
import kotlinx.serialization.Serializable
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode
@Serializable
@@ -44,6 +45,8 @@ data class PageV2(
val userQuad: NormalizedQuad? = null,
val isColored: Boolean? = null,
val colorMode: ColorMode? = null,
val focalLength: Float? = null,
val sensorWidth: Float? = null,
)
@Serializable

View File

@@ -35,6 +35,7 @@ import org.fairscan.app.domain.ScanPage
import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.cameraIntrinsics
import java.io.File
import java.util.Collections.synchronizedMap
@@ -153,6 +154,8 @@ class ImageRepository(
manualRotationDegrees = Rotation.R0.degrees,
isColored = metadata.autoColorMode == ColorMode.COLOR,
colorMode = colorMode,
focalLength = metadata.cameraIntrinsics?.focalLength,
sensorWidth = metadata.cameraIntrinsics?.sensorWidth,
)
)
saveMetadata()
@@ -215,8 +218,7 @@ class ImageRepository(
val processedJpeg =
transformations.process(
sourceJpeg,
normalizedQuad = update.normalizedQuad,
baseRotation = metadata.baseRotation,
metadata = metadata.copy(normalizedQuad = update.normalizedQuad),
colorMode = update.colorMode
)
processedFile.writeBytes(processedJpeg.bytes)
@@ -403,6 +405,7 @@ fun PageV2.toMetadata(): PageMetadata? {
return PageMetadata(
(userQuad ?: quad).toQuad(),
Rotation.fromDegrees(baseRotationDegrees),
if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE
if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE,
cameraIntrinsics(focalLength, sensorWidth)
)
}

View File

@@ -15,9 +15,8 @@
package org.fairscan.app.data
import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.Rotation
import org.fairscan.app.domain.PageMetadata
import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.Quad
interface ImageTransformations {
@@ -27,8 +26,7 @@ interface ImageTransformations {
fun process(
source: Jpeg,
normalizedQuad: Quad,
baseRotation: Rotation,
metadata: PageMetadata,
colorMode: ColorMode
): Jpeg

View File

@@ -51,8 +51,7 @@ suspend fun jpegsForExport(
val colorMode = page.colorMode
if (source != null && metadata != null && colorMode != null) {
val rotation = page.totalRotation()
val normalizedQuad = metadata.normalizedQuad
processedImage(source, normalizedQuad, rotation, colorMode, exportQuality)
processedImage(source, metadata, rotation, colorMode, exportQuality)
}
else
jpeg(page, imageRepository)

View File

@@ -14,6 +14,7 @@
*/
package org.fairscan.app.domain
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.Quad
@@ -21,6 +22,7 @@ data class PageMetadata(
val normalizedQuad: Quad,
val baseRotation: Rotation,
val autoColorMode: ColorMode,
val cameraIntrinsics: CameraIntrinsics?,
)
data class ScanPage(

View File

@@ -25,6 +25,7 @@ import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.PageMetadata
import org.fairscan.app.domain.Rotation
import org.fairscan.app.ui.screens.settings.DefaultColorMode
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.Mask
import org.fairscan.imageprocessing.Point
@@ -79,17 +80,17 @@ class ImageProcessor(private val thumbnailSizePx: Int) : ImageTransformations {
override fun process(
source: Jpeg,
normalizedQuad: Quad,
baseRotation: Rotation,
metadata: PageMetadata,
colorMode: ColorMode
): Jpeg {
return processedImage(source, normalizedQuad, baseRotation, colorMode, ExportQuality.BALANCED)
val baseRotation = metadata.baseRotation
return processedImage(source, metadata, baseRotation, colorMode, ExportQuality.BALANCED)
}
}
fun processedImage(
source: Jpeg,
normalizedQuad: Quad,
metadata: PageMetadata,
rotation: Rotation,
colorMode: ColorMode,
exportQuality: ExportQuality,
@@ -99,8 +100,9 @@ fun processedImage(
var page: Mat? = null
try {
sourceMat = source.toMat()
val quad = normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height())
page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels)
val quad = metadata.normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height())
page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
metadata.cameraIntrinsics)
return Jpeg.fromMat(page, exportQuality.jpegQuality)
} finally {
sourceMat?.release()
@@ -114,7 +116,8 @@ fun extractDocumentFromBitmap(
rotationDegrees: Int,
mask: Mask?,
viewModelScope: CoroutineScope,
defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO
defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO,
cameraIntrinsics: CameraIntrinsics?,
): CapturedPage {
val exportQuality = ExportQuality.BALANCED
var colorMode = ColorMode.COLOR
@@ -140,7 +143,8 @@ fun extractDocumentFromBitmap(
normalizedQuad = quad.scaledTo(source.width, source.height, 1, 1)
autoColorMode = autoColorMode(bgr, mask, quad)
colorMode = defaultColorMode.colorMode ?: autoColorMode
page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels)
page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
cameraIntrinsics)
}
val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality)
@@ -148,7 +152,7 @@ fun extractDocumentFromBitmap(
page.release()
val baseRotation = Rotation.fromDegrees(rotationDegrees)
val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode)
val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode, cameraIntrinsics)
val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) {
compressSource(source)
}

View File

@@ -19,6 +19,9 @@ import android.util.Log
import android.util.Size
import android.view.ViewGroup.LayoutParams.MATCH_PARENT
import android.widget.LinearLayout
import androidx.annotation.OptIn
import androidx.camera.camera2.interop.Camera2CameraInfo
import androidx.camera.camera2.interop.ExperimentalCamera2Interop
import androidx.camera.core.CameraControl
import androidx.camera.core.CameraSelector
import androidx.camera.core.FocusMeteringAction
@@ -65,12 +68,15 @@ import androidx.core.graphics.scale
import androidx.lifecycle.LifecycleOwner
import androidx.lifecycle.compose.LocalLifecycleOwner
import org.fairscan.app.ui.components.CameraPermissionState
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.cameraIntrinsics
import org.fairscan.imageprocessing.scaledTo
import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors
import java.util.concurrent.TimeUnit
import kotlin.math.max
@Composable
fun CameraPreview(
@@ -162,6 +168,7 @@ fun CameraPreview(
}
@OptIn(ExperimentalCamera2Interop::class)
fun bindCameraUseCases(
lifecycleOwner: LifecycleOwner,
cameraProvider: ProcessCameraProvider,
@@ -207,6 +214,7 @@ fun bindCameraUseCases(
val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector,
imageAnalysis, preview, imageCapture)
captureController.cameraControl = camera.cameraControl
captureController.setCameraCharacteristics(Camera2CameraInfo.from(camera.cameraInfo))
}
@Composable
@@ -287,21 +295,22 @@ class CameraCaptureController {
var imageCapture: ImageCapture? = null
private val executor = Executors.newSingleThreadExecutor()
var previewView: PreviewView? = null
var cameraIntrinsics: CameraIntrinsics? = null
fun shutdown() {
executor.shutdown()
}
fun takePicture(onImageCaptured: (ImageProxy?) -> Unit) {
fun takePicture(onImageCaptured: (ImageProxy?, CameraIntrinsics?) -> Unit) {
imageCapture?.takePicture(
executor,
object : ImageCapture.OnImageCapturedCallback() {
override fun onCaptureSuccess(imageProxy: ImageProxy) {
onImageCaptured(imageProxy)
onImageCaptured(imageProxy, cameraIntrinsics)
}
override fun onError(exception: ImageCaptureException) {
Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception)
onImageCaptured(null)
onImageCaptured(null, cameraIntrinsics)
}
}
)
@@ -320,6 +329,22 @@ class CameraCaptureController {
control.startFocusAndMetering(action)
}
@OptIn(ExperimentalCamera2Interop::class)
fun setCameraCharacteristics(cameraInfo: Camera2CameraInfo) {
val focalLengths = cameraInfo.getCameraCharacteristic(
android.hardware.camera2.CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS
)
val sensorSize = cameraInfo.getCameraCharacteristic(
android.hardware.camera2.CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE
)
cameraIntrinsics =
if (focalLengths == null || focalLengths.size != 1 || sensorSize == null) {
null
} else {
cameraIntrinsics(focalLengths[0], max(sensorSize.width, sensorSize.height))
}
}
}
sealed interface CameraBindState {

View File

@@ -229,7 +229,8 @@ fun CameraScreen(
Log.i("FairScan", "Pressed <Capture>")
cameraViewModel.onCapturePressed(it)
captureController.takePicture(
onImageCaptured = { imageProxy -> cameraViewModel.onImageCaptured(imageProxy) }
onImageCaptured = { imageProxy, cameraCharacteristics ->
cameraViewModel.onImageCaptured(imageProxy, cameraCharacteristics) }
)
}
},
@@ -647,7 +648,7 @@ fun CameraScreenPreviewWithProcessedImage() {
CapturedPage(
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"),
CompletableDeferred(Jpeg(ByteArray(0))),
PageMetadata(quad, R0, ColorMode.COLOR),
PageMetadata(quad, R0, ColorMode.COLOR, null),
ColorMode.COLOR)))
}

View File

@@ -34,6 +34,7 @@ import kotlinx.coroutines.withContext
import org.fairscan.app.AppContainer
import org.fairscan.app.domain.CapturedPage
import org.fairscan.app.platform.extractDocumentFromBitmap
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.detectDocumentQuad
import java.util.concurrent.CancellationException
@@ -133,12 +134,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
}
}
fun onImageCaptured(imageProxy: ImageProxy?) {
fun onImageCaptured(imageProxy: ImageProxy?, cameraIntrinsics: CameraIntrinsics?) {
if (imageProxy != null) {
viewModelScope.launch {
try {
val source = imageProxy.toBitmap()
val page = processCapturedImage(source, imageProxy.imageInfo.rotationDegrees)
val rotationDegrees = imageProxy.imageInfo.rotationDegrees
val page = processCapturedImage(source, rotationDegrees, cameraIntrinsics)
imageProxy.close()
onCaptureProcessed(page)
} catch (e: RuntimeException) {
@@ -154,6 +156,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
private suspend fun processCapturedImage(
source: Bitmap,
rotationDegrees: Int,
cameraIntrinsics: CameraIntrinsics?,
): CapturedPage = withContext(Dispatchers.IO) {
val segmentation = imageSegmentationService.runSegmentationAndReturn(source)
val mask = segmentation?.segmentation
@@ -161,7 +164,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) }
val defaultColorMode = settingsRepository.defaultColorMode.first()
val result = extractDocumentFromBitmap(
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode)
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, cameraIntrinsics)
return@withContext result
}
@@ -202,11 +205,9 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
try {
val photoToImport = imageLoader.load(uri)
ensureActive()
val page = processCapturedImage(photoToImport, 0)
val page = processCapturedImage(photoToImport, 0, null)
ensureActive()
page?.let {
_events.emit(CameraEvent.ImageCaptured(it))
}
_events.emit(CameraEvent.ImageCaptured(page))
} catch (e: CancellationException) {
throw e
} catch (e: Exception) {

View File

@@ -31,6 +31,7 @@ import org.fairscan.app.domain.Rotation.R0
import org.fairscan.app.domain.Rotation.R180
import org.fairscan.app.domain.Rotation.R270
import org.fairscan.app.domain.Rotation.R90
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ColorMode.COLOR
import org.fairscan.imageprocessing.ColorMode.GRAYSCALE
@@ -51,7 +52,8 @@ class ImageRepositoryTest {
private val testScope = TestScope()
val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09))
val metadata1 = PageMetadata(quad1, R90, COLOR)
val intrinsics = CameraIntrinsics(42.0f, 43.0f)
val metadata1 = PageMetadata(quad1, R90, COLOR, intrinsics)
fun getFilesDir(): File {
if (_filesDir == null) {
@@ -63,7 +65,7 @@ class ImageRepositoryTest {
fun repo(
rotate: (Jpeg, Int) -> Jpeg = { input, _ -> input },
resizeToThumbnail: (Jpeg) -> Jpeg = { input -> jpeg(input.bytes[0]) },
process: (Jpeg, Quad, Rotation, ColorMode) -> Jpeg = { _, _, _, _ ->
process: (Jpeg, PageMetadata, ColorMode) -> Jpeg = { _, _, _ ->
throw UnsupportedOperationException()
}
): ImageRepository {
@@ -74,10 +76,9 @@ class ImageRepositoryTest {
resizeToThumbnail(input)
override fun process(
source: Jpeg,
normalizedQuad: Quad,
baseRotation: Rotation,
metadata: PageMetadata,
colorMode: ColorMode
): Jpeg = process(source, normalizedQuad, baseRotation, colorMode)
): Jpeg = process(source, metadata, colorMode)
}
return ImageRepository(getFilesDir(), transformations, testScope)
@@ -244,7 +245,7 @@ class ImageRepositoryTest {
fun setColorMode_should_process_and_update_metadata() = runTest {
val jpeg1 = jpeg(10)
val repo = repo(
process = { _, _ , _, mode ->
process = { _, _, mode ->
assertThat(mode).isEqualTo(GRAYSCALE)
jpeg(41)
}
@@ -262,7 +263,7 @@ class ImageRepositoryTest {
fun setColorMode_should_not_run_twice_in_parallel() = runTest {
var processCalls = 0
val repo = repo(
process = { _, _, _, _ ->
process = { _, _, _ ->
processCalls++
runBlocking { delay(10) }
jpeg(1)

View File

@@ -156,9 +156,14 @@ fun extractDocument(
rotationDegrees: Int,
colorMode: ColorMode,
maxPixels: Long,
cameraIntrinsics: CameraIntrinsics? = null,
): Mat {
val (targetWidth, targetHeight) = estimateRealDimensions(quad, inputMat.cols(), inputMat.rows())
val (targetWidth, targetHeight) = estimateRealDimensions(
quad,
inputMat.cols(),
inputMat.rows(),
cameraIntrinsics
)
val srcPoints = MatOfPoint2f(
quad.topLeft.toCv(),
quad.topRight.toCv(),

View File

@@ -32,6 +32,21 @@ data class Vector3D(val x: Double, val y: Double, val z: Double) {
fun norm() = sqrt(x * x + y * y + z * z)
}
data class CameraIntrinsics(
// in millimeters
val focalLength: Float,
val sensorWidth: Float,
) {
fun focalLengthInPixels(imageWidthInPixels: Int) =
focalLength / sensorWidth * imageWidthInPixels
}
fun cameraIntrinsics(focalLengthInMm: Float?, sensorWidthInMm: Float?): CameraIntrinsics? {
if (focalLengthInMm == null || sensorWidthInMm == null)
return null
return CameraIntrinsics(focalLengthInMm, sensorWidthInMm)
}
/**
* Estimates the true width and height of the document in the output image,
* correcting for perspective distortion using projective geometry.
@@ -44,7 +59,12 @@ data class Vector3D(val x: Double, val y: Double, val z: Double) {
* - https://www.robots.ox.ac.uk/~vgg/publications/1999/Criminisi99/criminisi99.pdf
* - https://web.stanford.edu/class/cs231a/course_notes/02-single-view-metrology.pdf
*/
fun estimateRealDimensions(quad: Quad, imageWidth: Int, imageHeight: Int): Pair<Double, Double> {
fun estimateRealDimensions(
quad: Quad,
imageWidth: Int,
imageHeight: Int,
cameraIntrinsics: CameraIntrinsics?
): Pair<Double, Double> {
fun averageSides(): Pair<Double, Double> {
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
@@ -77,14 +97,18 @@ fun estimateRealDimensions(quad: Quad, imageWidth: Int, imageHeight: Int): Pair<
val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy)
val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy)
// Focal length estimated assuming zero skew and principal point at image center.
// Under these assumptions, the Image of the Absolute Conic (IAC) simplifies,
// and orthogonal directions satisfy v1 · ω · v2 = 0,
// which reduces to: f² = -(v1x·v2x + v1y·v2y)
val f2 = -(v1.x * v2.x + v1.y * v2.y)
if (f2 <= 0)
return averageSides()
val f = sqrt(f2)
val f = if (cameraIntrinsics != null) {
cameraIntrinsics.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble()
} else {
// Focal length estimated assuming zero skew and principal point at image center.
// Under these assumptions, the Image of the Absolute Conic (IAC) simplifies,
// and orthogonal directions satisfy v1 · ω · v2 = 0,
// which reduces to: f² = -(v1x·v2x + v1y·v2y)
val f2 = -(v1.x * v2.x + v1.y * v2.y)
if (f2 <= 0)
return averageSides()
sqrt(f2)
}
// Fall back when f is too large: document nearly fronto-parallel,
// vanishing points are far away, making the focal length estimate unstable.