Improve perspective correction with camera intrinsics (#182)

This commit is contained in:
Pierre-Yves Nicolas
2026-05-20 20:33:03 +02:00
committed by GitHub
parent d9844be4de
commit 27ad5efeff
12 changed files with 114 additions and 48 deletions

View File

@@ -15,6 +15,7 @@
package org.fairscan.app.data package org.fairscan.app.data
import kotlinx.serialization.Serializable import kotlinx.serialization.Serializable
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
@Serializable @Serializable
@@ -44,6 +45,8 @@ data class PageV2(
val userQuad: NormalizedQuad? = null, val userQuad: NormalizedQuad? = null,
val isColored: Boolean? = null, val isColored: Boolean? = null,
val colorMode: ColorMode? = null, val colorMode: ColorMode? = null,
val focalLength: Float? = null,
val sensorWidth: Float? = null,
) )
@Serializable @Serializable

View File

@@ -35,6 +35,7 @@ import org.fairscan.app.domain.ScanPage
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.Point import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.cameraIntrinsics
import java.io.File import java.io.File
import java.util.Collections.synchronizedMap import java.util.Collections.synchronizedMap
@@ -153,6 +154,8 @@ class ImageRepository(
manualRotationDegrees = Rotation.R0.degrees, manualRotationDegrees = Rotation.R0.degrees,
isColored = metadata.autoColorMode == ColorMode.COLOR, isColored = metadata.autoColorMode == ColorMode.COLOR,
colorMode = colorMode, colorMode = colorMode,
focalLength = metadata.cameraIntrinsics?.focalLength,
sensorWidth = metadata.cameraIntrinsics?.sensorWidth,
) )
) )
saveMetadata() saveMetadata()
@@ -215,8 +218,7 @@ class ImageRepository(
val processedJpeg = val processedJpeg =
transformations.process( transformations.process(
sourceJpeg, sourceJpeg,
normalizedQuad = update.normalizedQuad, metadata = metadata.copy(normalizedQuad = update.normalizedQuad),
baseRotation = metadata.baseRotation,
colorMode = update.colorMode colorMode = update.colorMode
) )
processedFile.writeBytes(processedJpeg.bytes) processedFile.writeBytes(processedJpeg.bytes)
@@ -403,6 +405,7 @@ fun PageV2.toMetadata(): PageMetadata? {
return PageMetadata( return PageMetadata(
(userQuad ?: quad).toQuad(), (userQuad ?: quad).toQuad(),
Rotation.fromDegrees(baseRotationDegrees), Rotation.fromDegrees(baseRotationDegrees),
if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE,
cameraIntrinsics(focalLength, sensorWidth)
) )
} }

View File

@@ -15,9 +15,8 @@
package org.fairscan.app.data package org.fairscan.app.data
import org.fairscan.app.domain.Jpeg import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.Rotation import org.fairscan.app.domain.PageMetadata
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.Quad
interface ImageTransformations { interface ImageTransformations {
@@ -27,8 +26,7 @@ interface ImageTransformations {
fun process( fun process(
source: Jpeg, source: Jpeg,
normalizedQuad: Quad, metadata: PageMetadata,
baseRotation: Rotation,
colorMode: ColorMode colorMode: ColorMode
): Jpeg ): Jpeg

View File

@@ -51,8 +51,7 @@ suspend fun jpegsForExport(
val colorMode = page.colorMode val colorMode = page.colorMode
if (source != null && metadata != null && colorMode != null) { if (source != null && metadata != null && colorMode != null) {
val rotation = page.totalRotation() val rotation = page.totalRotation()
val normalizedQuad = metadata.normalizedQuad processedImage(source, metadata, rotation, colorMode, exportQuality)
processedImage(source, normalizedQuad, rotation, colorMode, exportQuality)
} }
else else
jpeg(page, imageRepository) jpeg(page, imageRepository)

View File

@@ -14,6 +14,7 @@
*/ */
package org.fairscan.app.domain package org.fairscan.app.domain
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.Quad
@@ -21,6 +22,7 @@ data class PageMetadata(
val normalizedQuad: Quad, val normalizedQuad: Quad,
val baseRotation: Rotation, val baseRotation: Rotation,
val autoColorMode: ColorMode, val autoColorMode: ColorMode,
val cameraIntrinsics: CameraIntrinsics?,
) )
data class ScanPage( data class ScanPage(

View File

@@ -25,6 +25,7 @@ import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.PageMetadata import org.fairscan.app.domain.PageMetadata
import org.fairscan.app.domain.Rotation import org.fairscan.app.domain.Rotation
import org.fairscan.app.ui.screens.settings.DefaultColorMode import org.fairscan.app.ui.screens.settings.DefaultColorMode
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.Mask import org.fairscan.imageprocessing.Mask
import org.fairscan.imageprocessing.Point import org.fairscan.imageprocessing.Point
@@ -79,17 +80,17 @@ class ImageProcessor(private val thumbnailSizePx: Int) : ImageTransformations {
override fun process( override fun process(
source: Jpeg, source: Jpeg,
normalizedQuad: Quad, metadata: PageMetadata,
baseRotation: Rotation,
colorMode: ColorMode colorMode: ColorMode
): Jpeg { ): Jpeg {
return processedImage(source, normalizedQuad, baseRotation, colorMode, ExportQuality.BALANCED) val baseRotation = metadata.baseRotation
return processedImage(source, metadata, baseRotation, colorMode, ExportQuality.BALANCED)
} }
} }
fun processedImage( fun processedImage(
source: Jpeg, source: Jpeg,
normalizedQuad: Quad, metadata: PageMetadata,
rotation: Rotation, rotation: Rotation,
colorMode: ColorMode, colorMode: ColorMode,
exportQuality: ExportQuality, exportQuality: ExportQuality,
@@ -99,8 +100,9 @@ fun processedImage(
var page: Mat? = null var page: Mat? = null
try { try {
sourceMat = source.toMat() sourceMat = source.toMat()
val quad = normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height()) val quad = metadata.normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height())
page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels) page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
metadata.cameraIntrinsics)
return Jpeg.fromMat(page, exportQuality.jpegQuality) return Jpeg.fromMat(page, exportQuality.jpegQuality)
} finally { } finally {
sourceMat?.release() sourceMat?.release()
@@ -114,7 +116,8 @@ fun extractDocumentFromBitmap(
rotationDegrees: Int, rotationDegrees: Int,
mask: Mask?, mask: Mask?,
viewModelScope: CoroutineScope, viewModelScope: CoroutineScope,
defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO,
cameraIntrinsics: CameraIntrinsics?,
): CapturedPage { ): CapturedPage {
val exportQuality = ExportQuality.BALANCED val exportQuality = ExportQuality.BALANCED
var colorMode = ColorMode.COLOR var colorMode = ColorMode.COLOR
@@ -140,7 +143,8 @@ fun extractDocumentFromBitmap(
normalizedQuad = quad.scaledTo(source.width, source.height, 1, 1) normalizedQuad = quad.scaledTo(source.width, source.height, 1, 1)
autoColorMode = autoColorMode(bgr, mask, quad) autoColorMode = autoColorMode(bgr, mask, quad)
colorMode = defaultColorMode.colorMode ?: autoColorMode colorMode = defaultColorMode.colorMode ?: autoColorMode
page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels) page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
cameraIntrinsics)
} }
val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality) val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality)
@@ -148,7 +152,7 @@ fun extractDocumentFromBitmap(
page.release() page.release()
val baseRotation = Rotation.fromDegrees(rotationDegrees) val baseRotation = Rotation.fromDegrees(rotationDegrees)
val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode) val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode, cameraIntrinsics)
val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) { val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) {
compressSource(source) compressSource(source)
} }

View File

@@ -19,6 +19,9 @@ import android.util.Log
import android.util.Size import android.util.Size
import android.view.ViewGroup.LayoutParams.MATCH_PARENT import android.view.ViewGroup.LayoutParams.MATCH_PARENT
import android.widget.LinearLayout import android.widget.LinearLayout
import androidx.annotation.OptIn
import androidx.camera.camera2.interop.Camera2CameraInfo
import androidx.camera.camera2.interop.ExperimentalCamera2Interop
import androidx.camera.core.CameraControl import androidx.camera.core.CameraControl
import androidx.camera.core.CameraSelector import androidx.camera.core.CameraSelector
import androidx.camera.core.FocusMeteringAction import androidx.camera.core.FocusMeteringAction
@@ -65,12 +68,15 @@ import androidx.core.graphics.scale
import androidx.lifecycle.LifecycleOwner import androidx.lifecycle.LifecycleOwner
import androidx.lifecycle.compose.LocalLifecycleOwner import androidx.lifecycle.compose.LocalLifecycleOwner
import org.fairscan.app.ui.components.CameraPermissionState import org.fairscan.app.ui.components.CameraPermissionState
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.Point import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.cameraIntrinsics
import org.fairscan.imageprocessing.scaledTo import org.fairscan.imageprocessing.scaledTo
import java.util.concurrent.ExecutorService import java.util.concurrent.ExecutorService
import java.util.concurrent.Executors import java.util.concurrent.Executors
import java.util.concurrent.TimeUnit import java.util.concurrent.TimeUnit
import kotlin.math.max
@Composable @Composable
fun CameraPreview( fun CameraPreview(
@@ -162,6 +168,7 @@ fun CameraPreview(
} }
@OptIn(ExperimentalCamera2Interop::class)
fun bindCameraUseCases( fun bindCameraUseCases(
lifecycleOwner: LifecycleOwner, lifecycleOwner: LifecycleOwner,
cameraProvider: ProcessCameraProvider, cameraProvider: ProcessCameraProvider,
@@ -207,6 +214,7 @@ fun bindCameraUseCases(
val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector,
imageAnalysis, preview, imageCapture) imageAnalysis, preview, imageCapture)
captureController.cameraControl = camera.cameraControl captureController.cameraControl = camera.cameraControl
captureController.setCameraCharacteristics(Camera2CameraInfo.from(camera.cameraInfo))
} }
@Composable @Composable
@@ -287,21 +295,22 @@ class CameraCaptureController {
var imageCapture: ImageCapture? = null var imageCapture: ImageCapture? = null
private val executor = Executors.newSingleThreadExecutor() private val executor = Executors.newSingleThreadExecutor()
var previewView: PreviewView? = null var previewView: PreviewView? = null
var cameraIntrinsics: CameraIntrinsics? = null
fun shutdown() { fun shutdown() {
executor.shutdown() executor.shutdown()
} }
fun takePicture(onImageCaptured: (ImageProxy?) -> Unit) { fun takePicture(onImageCaptured: (ImageProxy?, CameraIntrinsics?) -> Unit) {
imageCapture?.takePicture( imageCapture?.takePicture(
executor, executor,
object : ImageCapture.OnImageCapturedCallback() { object : ImageCapture.OnImageCapturedCallback() {
override fun onCaptureSuccess(imageProxy: ImageProxy) { override fun onCaptureSuccess(imageProxy: ImageProxy) {
onImageCaptured(imageProxy) onImageCaptured(imageProxy, cameraIntrinsics)
} }
override fun onError(exception: ImageCaptureException) { override fun onError(exception: ImageCaptureException) {
Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception) Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception)
onImageCaptured(null) onImageCaptured(null, cameraIntrinsics)
} }
} }
) )
@@ -320,6 +329,22 @@ class CameraCaptureController {
control.startFocusAndMetering(action) control.startFocusAndMetering(action)
} }
@OptIn(ExperimentalCamera2Interop::class)
fun setCameraCharacteristics(cameraInfo: Camera2CameraInfo) {
val focalLengths = cameraInfo.getCameraCharacteristic(
android.hardware.camera2.CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS
)
val sensorSize = cameraInfo.getCameraCharacteristic(
android.hardware.camera2.CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE
)
cameraIntrinsics =
if (focalLengths == null || focalLengths.size != 1 || sensorSize == null) {
null
} else {
cameraIntrinsics(focalLengths[0], max(sensorSize.width, sensorSize.height))
}
}
} }
sealed interface CameraBindState { sealed interface CameraBindState {

View File

@@ -229,7 +229,8 @@ fun CameraScreen(
Log.i("FairScan", "Pressed <Capture>") Log.i("FairScan", "Pressed <Capture>")
cameraViewModel.onCapturePressed(it) cameraViewModel.onCapturePressed(it)
captureController.takePicture( captureController.takePicture(
onImageCaptured = { imageProxy -> cameraViewModel.onImageCaptured(imageProxy) } onImageCaptured = { imageProxy, cameraCharacteristics ->
cameraViewModel.onImageCaptured(imageProxy, cameraCharacteristics) }
) )
} }
}, },
@@ -647,7 +648,7 @@ fun CameraScreenPreviewWithProcessedImage() {
CapturedPage( CapturedPage(
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"), debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"),
CompletableDeferred(Jpeg(ByteArray(0))), CompletableDeferred(Jpeg(ByteArray(0))),
PageMetadata(quad, R0, ColorMode.COLOR), PageMetadata(quad, R0, ColorMode.COLOR, null),
ColorMode.COLOR))) ColorMode.COLOR)))
} }

View File

@@ -34,6 +34,7 @@ import kotlinx.coroutines.withContext
import org.fairscan.app.AppContainer import org.fairscan.app.AppContainer
import org.fairscan.app.domain.CapturedPage import org.fairscan.app.domain.CapturedPage
import org.fairscan.app.platform.extractDocumentFromBitmap import org.fairscan.app.platform.extractDocumentFromBitmap
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ImageSize import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.detectDocumentQuad import org.fairscan.imageprocessing.detectDocumentQuad
import java.util.concurrent.CancellationException import java.util.concurrent.CancellationException
@@ -133,12 +134,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
} }
} }
fun onImageCaptured(imageProxy: ImageProxy?) { fun onImageCaptured(imageProxy: ImageProxy?, cameraIntrinsics: CameraIntrinsics?) {
if (imageProxy != null) { if (imageProxy != null) {
viewModelScope.launch { viewModelScope.launch {
try { try {
val source = imageProxy.toBitmap() val source = imageProxy.toBitmap()
val page = processCapturedImage(source, imageProxy.imageInfo.rotationDegrees) val rotationDegrees = imageProxy.imageInfo.rotationDegrees
val page = processCapturedImage(source, rotationDegrees, cameraIntrinsics)
imageProxy.close() imageProxy.close()
onCaptureProcessed(page) onCaptureProcessed(page)
} catch (e: RuntimeException) { } catch (e: RuntimeException) {
@@ -154,6 +156,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
private suspend fun processCapturedImage( private suspend fun processCapturedImage(
source: Bitmap, source: Bitmap,
rotationDegrees: Int, rotationDegrees: Int,
cameraIntrinsics: CameraIntrinsics?,
): CapturedPage = withContext(Dispatchers.IO) { ): CapturedPage = withContext(Dispatchers.IO) {
val segmentation = imageSegmentationService.runSegmentationAndReturn(source) val segmentation = imageSegmentationService.runSegmentationAndReturn(source)
val mask = segmentation?.segmentation val mask = segmentation?.segmentation
@@ -161,7 +164,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) } val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) }
val defaultColorMode = settingsRepository.defaultColorMode.first() val defaultColorMode = settingsRepository.defaultColorMode.first()
val result = extractDocumentFromBitmap( val result = extractDocumentFromBitmap(
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode) source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, cameraIntrinsics)
return@withContext result return@withContext result
} }
@@ -202,11 +205,9 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
try { try {
val photoToImport = imageLoader.load(uri) val photoToImport = imageLoader.load(uri)
ensureActive() ensureActive()
val page = processCapturedImage(photoToImport, 0) val page = processCapturedImage(photoToImport, 0, null)
ensureActive() ensureActive()
page?.let { _events.emit(CameraEvent.ImageCaptured(page))
_events.emit(CameraEvent.ImageCaptured(it))
}
} catch (e: CancellationException) { } catch (e: CancellationException) {
throw e throw e
} catch (e: Exception) { } catch (e: Exception) {

View File

@@ -31,6 +31,7 @@ import org.fairscan.app.domain.Rotation.R0
import org.fairscan.app.domain.Rotation.R180 import org.fairscan.app.domain.Rotation.R180
import org.fairscan.app.domain.Rotation.R270 import org.fairscan.app.domain.Rotation.R270
import org.fairscan.app.domain.Rotation.R90 import org.fairscan.app.domain.Rotation.R90
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ColorMode.COLOR import org.fairscan.imageprocessing.ColorMode.COLOR
import org.fairscan.imageprocessing.ColorMode.GRAYSCALE import org.fairscan.imageprocessing.ColorMode.GRAYSCALE
@@ -51,7 +52,8 @@ class ImageRepositoryTest {
private val testScope = TestScope() private val testScope = TestScope()
val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09)) val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09))
val metadata1 = PageMetadata(quad1, R90, COLOR) val intrinsics = CameraIntrinsics(42.0f, 43.0f)
val metadata1 = PageMetadata(quad1, R90, COLOR, intrinsics)
fun getFilesDir(): File { fun getFilesDir(): File {
if (_filesDir == null) { if (_filesDir == null) {
@@ -63,7 +65,7 @@ class ImageRepositoryTest {
fun repo( fun repo(
rotate: (Jpeg, Int) -> Jpeg = { input, _ -> input }, rotate: (Jpeg, Int) -> Jpeg = { input, _ -> input },
resizeToThumbnail: (Jpeg) -> Jpeg = { input -> jpeg(input.bytes[0]) }, resizeToThumbnail: (Jpeg) -> Jpeg = { input -> jpeg(input.bytes[0]) },
process: (Jpeg, Quad, Rotation, ColorMode) -> Jpeg = { _, _, _, _ -> process: (Jpeg, PageMetadata, ColorMode) -> Jpeg = { _, _, _ ->
throw UnsupportedOperationException() throw UnsupportedOperationException()
} }
): ImageRepository { ): ImageRepository {
@@ -74,10 +76,9 @@ class ImageRepositoryTest {
resizeToThumbnail(input) resizeToThumbnail(input)
override fun process( override fun process(
source: Jpeg, source: Jpeg,
normalizedQuad: Quad, metadata: PageMetadata,
baseRotation: Rotation,
colorMode: ColorMode colorMode: ColorMode
): Jpeg = process(source, normalizedQuad, baseRotation, colorMode) ): Jpeg = process(source, metadata, colorMode)
} }
return ImageRepository(getFilesDir(), transformations, testScope) return ImageRepository(getFilesDir(), transformations, testScope)
@@ -244,7 +245,7 @@ class ImageRepositoryTest {
fun setColorMode_should_process_and_update_metadata() = runTest { fun setColorMode_should_process_and_update_metadata() = runTest {
val jpeg1 = jpeg(10) val jpeg1 = jpeg(10)
val repo = repo( val repo = repo(
process = { _, _ , _, mode -> process = { _, _, mode ->
assertThat(mode).isEqualTo(GRAYSCALE) assertThat(mode).isEqualTo(GRAYSCALE)
jpeg(41) jpeg(41)
} }
@@ -262,7 +263,7 @@ class ImageRepositoryTest {
fun setColorMode_should_not_run_twice_in_parallel() = runTest { fun setColorMode_should_not_run_twice_in_parallel() = runTest {
var processCalls = 0 var processCalls = 0
val repo = repo( val repo = repo(
process = { _, _, _, _ -> process = { _, _, _ ->
processCalls++ processCalls++
runBlocking { delay(10) } runBlocking { delay(10) }
jpeg(1) jpeg(1)

View File

@@ -156,9 +156,14 @@ fun extractDocument(
rotationDegrees: Int, rotationDegrees: Int,
colorMode: ColorMode, colorMode: ColorMode,
maxPixels: Long, maxPixels: Long,
cameraIntrinsics: CameraIntrinsics? = null,
): Mat { ): Mat {
val (targetWidth, targetHeight) = estimateRealDimensions(quad, inputMat.cols(), inputMat.rows()) val (targetWidth, targetHeight) = estimateRealDimensions(
quad,
inputMat.cols(),
inputMat.rows(),
cameraIntrinsics
)
val srcPoints = MatOfPoint2f( val srcPoints = MatOfPoint2f(
quad.topLeft.toCv(), quad.topLeft.toCv(),
quad.topRight.toCv(), quad.topRight.toCv(),

View File

@@ -32,6 +32,21 @@ data class Vector3D(val x: Double, val y: Double, val z: Double) {
fun norm() = sqrt(x * x + y * y + z * z) fun norm() = sqrt(x * x + y * y + z * z)
} }
data class CameraIntrinsics(
// in millimeters
val focalLength: Float,
val sensorWidth: Float,
) {
fun focalLengthInPixels(imageWidthInPixels: Int) =
focalLength / sensorWidth * imageWidthInPixels
}
fun cameraIntrinsics(focalLengthInMm: Float?, sensorWidthInMm: Float?): CameraIntrinsics? {
if (focalLengthInMm == null || sensorWidthInMm == null)
return null
return CameraIntrinsics(focalLengthInMm, sensorWidthInMm)
}
/** /**
* Estimates the true width and height of the document in the output image, * Estimates the true width and height of the document in the output image,
* correcting for perspective distortion using projective geometry. * correcting for perspective distortion using projective geometry.
@@ -44,7 +59,12 @@ data class Vector3D(val x: Double, val y: Double, val z: Double) {
* - https://www.robots.ox.ac.uk/~vgg/publications/1999/Criminisi99/criminisi99.pdf * - https://www.robots.ox.ac.uk/~vgg/publications/1999/Criminisi99/criminisi99.pdf
* - https://web.stanford.edu/class/cs231a/course_notes/02-single-view-metrology.pdf * - https://web.stanford.edu/class/cs231a/course_notes/02-single-view-metrology.pdf
*/ */
fun estimateRealDimensions(quad: Quad, imageWidth: Int, imageHeight: Int): Pair<Double, Double> { fun estimateRealDimensions(
quad: Quad,
imageWidth: Int,
imageHeight: Int,
cameraIntrinsics: CameraIntrinsics?
): Pair<Double, Double> {
fun averageSides(): Pair<Double, Double> { fun averageSides(): Pair<Double, Double> {
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2 val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
@@ -77,14 +97,18 @@ fun estimateRealDimensions(quad: Quad, imageWidth: Int, imageHeight: Int): Pair<
val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy) val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy)
val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy) val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy)
// Focal length estimated assuming zero skew and principal point at image center. val f = if (cameraIntrinsics != null) {
// Under these assumptions, the Image of the Absolute Conic (IAC) simplifies, cameraIntrinsics.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble()
// and orthogonal directions satisfy v1 · ω · v2 = 0, } else {
// which reduces to: f² = -(v1x·v2x + v1y·v2y) // Focal length estimated assuming zero skew and principal point at image center.
val f2 = -(v1.x * v2.x + v1.y * v2.y) // Under these assumptions, the Image of the Absolute Conic (IAC) simplifies,
if (f2 <= 0) // and orthogonal directions satisfy v1 · ω · v2 = 0,
return averageSides() // which reduces to: f² = -(v1x·v2x + v1y·v2y)
val f = sqrt(f2) val f2 = -(v1.x * v2.x + v1.y * v2.y)
if (f2 <= 0)
return averageSides()
sqrt(f2)
}
// Fall back when f is too large: document nearly fronto-parallel, // Fall back when f is too large: document nearly fronto-parallel,
// vanishing points are far away, making the focal length estimate unstable. // vanishing points are far away, making the focal length estimate unstable.