Use focus distance to estimate physical size

This commit is contained in:
Pierre-Yves Nicolas
2026-05-23 12:22:37 +02:00
parent b89aecf369
commit 9394b19731
16 changed files with 221 additions and 81 deletions

View File

@@ -15,7 +15,6 @@
package org.fairscan.app.data
import kotlinx.serialization.Serializable
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode
@Serializable
@@ -47,6 +46,9 @@ data class PageV2(
val colorMode: ColorMode? = null,
val focalLength: Float? = null,
val sensorWidth: Float? = null,
val subjectDistance: Float? = null,
val sourceWidth: Int? = null,
val sourceHeight: Int? = null,
)
@Serializable

View File

@@ -14,7 +14,7 @@
*/
package org.fairscan.app.data
import org.fairscan.app.domain.JpegProvider
import org.fairscan.app.domain.PageToExport
import java.io.File
import java.io.FileOutputStream
import java.io.OutputStream
@@ -26,7 +26,7 @@ data class GeneratedPdf(
)
fun interface PdfWriter {
suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int
suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int
}
class FileManager(
@@ -43,12 +43,12 @@ class FileManager(
}
}
suspend fun generatePdf(jpegs: List<JpegProvider>): GeneratedPdf {
suspend fun generatePdf(pages: List<PageToExport>): GeneratedPdf {
pdfDir.mkdirs()
require(pdfDir.exists() && pdfDir.isDirectory) { "Invalid pdfDir: $pdfDir" }
val file = File(pdfDir, "${System.currentTimeMillis()}.pdf")
val pageCount = FileOutputStream(file).use {
pdfWriter.writePdfFromJpegs(jpegs, it)
pdfWriter.writePdfFromJpegs(pages, it)
}
val sizeBytes = file.length()
return GeneratedPdf(file, sizeBytes, pageCount)

View File

@@ -33,6 +33,8 @@ import org.fairscan.app.domain.PageViewKey
import org.fairscan.app.domain.Rotation
import org.fairscan.app.domain.ScanPage
import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.cameraIntrinsics
@@ -154,8 +156,11 @@ class ImageRepository(
manualRotationDegrees = Rotation.R0.degrees,
isColored = metadata.autoColorMode == ColorMode.COLOR,
colorMode = colorMode,
focalLength = metadata.cameraIntrinsics?.focalLength,
sensorWidth = metadata.cameraIntrinsics?.sensorWidth,
focalLength = metadata.opticalMeasures?.cameraIntrinsics?.focalLength,
sensorWidth = metadata.opticalMeasures?.cameraIntrinsics?.sensorWidth,
subjectDistance = metadata.opticalMeasures?.subjectDistance,
sourceWidth = metadata.sourceSize?.width?.toInt(),
sourceHeight = metadata.sourceSize?.height?.toInt(),
)
)
saveMetadata()
@@ -402,10 +407,17 @@ fun NormalizedQuad.toQuad(): Quad =
fun PageV2.toMetadata(): PageMetadata? {
if (quad == null || isColored == null) return null
val cameraIntrinsics = cameraIntrinsics(focalLength, sensorWidth)
val sourceSize =
if (sourceWidth != null && sourceHeight != null)
ImageSize(sourceWidth, sourceHeight)
else
null
return PageMetadata(
(userQuad ?: quad).toQuad(),
Rotation.fromDegrees(baseRotationDegrees),
if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE,
cameraIntrinsics(focalLength, sensorWidth)
sourceSize,
cameraIntrinsics?.let { OpticalMeasures(it, subjectDistance) },
)
}

View File

@@ -16,26 +16,55 @@ package org.fairscan.app.domain
import org.fairscan.app.data.ImageRepository
import org.fairscan.app.platform.processedImage
import org.fairscan.imageprocessing.EstimatedDimensions
import org.fairscan.imageprocessing.estimateRealDimensions
import org.fairscan.imageprocessing.resizeForMaxPixels
import org.fairscan.imageprocessing.scaledTo
import org.opencv.core.Mat
fun interface JpegProvider {
suspend fun get(): Jpeg
}
suspend fun jpegsForExport(
data class PageToExport(
val metadata: PageMetadata?,
val jpeg: JpegProvider,
) {
fun estimatedDimensions(): EstimatedDimensions? {
if (metadata == null)
return null
val size = metadata.sourceSize
if (size == null)
return null
val quad = metadata.normalizedQuad.scaledTo(1.0, 1.0, size.width, size.height)
val realDimensions = estimateRealDimensions(
quad, size.width.toInt(), size.height.toInt(), metadata.opticalMeasures
)
return realDimensions.applyRotation(metadata.baseRotation)
}
}
private fun EstimatedDimensions.applyRotation(rotation: Rotation): EstimatedDimensions {
if ((rotation == Rotation.R90 || rotation == Rotation.R270)
&& this is EstimatedDimensions.Physical) {
return EstimatedDimensions.Physical(heightMm, widthMm)
}
return this
}
suspend fun pagesToExport(
imageRepository: ImageRepository,
exportQuality: ExportQuality
): List<JpegProvider> {
): List<PageToExport> {
val pages = imageRepository.pages()
return when (exportQuality) {
ExportQuality.BALANCED -> pages.map {
JpegProvider { jpeg(it, imageRepository) }
PageToExport(it.metadata) { jpeg(it, imageRepository) }
}
ExportQuality.LOW -> pages.map { page ->
JpegProvider {
PageToExport(page.metadata) {
resizeJpegBytesForMaxPixels(
jpeg = jpeg(page, imageRepository),
maxPixels = exportQuality.maxPixels.toDouble(),
@@ -45,7 +74,7 @@ suspend fun jpegsForExport(
}
ExportQuality.HIGH -> pages.map { page ->
JpegProvider {
PageToExport(page.metadata) {
val source = imageRepository.source(page.id)
val metadata = page.metadata
val colorMode = page.colorMode

View File

@@ -14,15 +14,17 @@
*/
package org.fairscan.app.domain
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Quad
data class PageMetadata(
val normalizedQuad: Quad,
val baseRotation: Rotation,
val autoColorMode: ColorMode,
val cameraIntrinsics: CameraIntrinsics?,
val sourceSize: ImageSize?,
val opticalMeasures: OpticalMeasures?,
)
data class ScanPage(

View File

@@ -22,33 +22,38 @@ import com.tom_roush.pdfbox.pdmodel.common.PDRectangle
import com.tom_roush.pdfbox.pdmodel.graphics.image.JPEGFactory
import org.fairscan.app.BuildConfig
import org.fairscan.app.data.PdfWriter
import org.fairscan.app.domain.JpegProvider
import org.fairscan.app.domain.PageToExport
import org.fairscan.imageprocessing.EstimatedDimensions
import java.io.OutputStream
import java.util.Calendar
class AndroidPdfWriter : PdfWriter {
override suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int {
override suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int {
val doc = PDDocument()
doc.documentInformation.creationDate = Calendar.getInstance()
doc.documentInformation.creator = "FairScan ${BuildConfig.VERSION_NAME}"
doc.use { document ->
for (jpegBytes in jpegs) {
val image = JPEGFactory.createFromByteArray(document, jpegBytes.get().bytes)
for (page in pages) {
val image = JPEGFactory.createFromByteArray(document, page.jpeg.get().bytes)
// Let's say that the physical dimensions of the page are close to US Letter
// US Letter: 215.9×279.4 mm (A4: 210×297 mm)
val maxDimInMm = 279.4f
// PDF has 72 points (units) per inch, 1 inch = 25.4 mm
val pointsPerMm = 72f / 25.4f
val widthPx = image.width.toFloat()
val heightPx = image.height.toFloat()
val maxPx = maxOf(widthPx, heightPx)
val scalePxToMm = maxDimInMm / maxPx
val widthPoints = widthPx * scalePxToMm * pointsPerMm
val heightPoints = heightPx * scalePxToMm * pointsPerMm
val dimensions = page.estimatedDimensions()
val (widthPoints, heightPoints) = when (dimensions) {
is EstimatedDimensions.Physical -> {
dimensions.widthMm.toFloat() * pointsPerMm to dimensions.heightMm.toFloat() * pointsPerMm
}
else -> {
// No physical dimensions available: approximate using US Letter max dimension
val maxDimInMm = 279.4f
val scalePxToMm = maxDimInMm / maxOf(widthPx, heightPx)
widthPx * scalePxToMm * pointsPerMm to heightPx * scalePxToMm * pointsPerMm
}
}
val page = PDPage(PDRectangle(widthPoints, heightPoints))
document.addPage(page)

View File

@@ -25,9 +25,10 @@ import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.PageMetadata
import org.fairscan.app.domain.Rotation
import org.fairscan.app.ui.screens.settings.DefaultColorMode
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.Mask
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.autoColorMode
@@ -102,7 +103,7 @@ fun processedImage(
sourceMat = source.toMat()
val quad = metadata.normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height())
page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
metadata.cameraIntrinsics)
metadata.opticalMeasures)
return Jpeg.fromMat(page, exportQuality.jpegQuality)
} finally {
sourceMat?.release()
@@ -117,7 +118,7 @@ fun extractDocumentFromBitmap(
mask: Mask?,
viewModelScope: CoroutineScope,
defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO,
cameraIntrinsics: CameraIntrinsics?,
opticalMeasures: OpticalMeasures?,
): CapturedPage {
val exportQuality = ExportQuality.BALANCED
var colorMode = ColorMode.COLOR
@@ -144,7 +145,7 @@ fun extractDocumentFromBitmap(
autoColorMode = autoColorMode(bgr, mask, quad)
colorMode = defaultColorMode.colorMode ?: autoColorMode
page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
cameraIntrinsics)
opticalMeasures)
}
val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality)
@@ -152,7 +153,9 @@ fun extractDocumentFromBitmap(
page.release()
val baseRotation = Rotation.fromDegrees(rotationDegrees)
val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode, cameraIntrinsics)
val sourceSize = ImageSize(source.width, source.height)
val metadata =
PageMetadata(normalizedQuad, baseRotation, autoColorMode, sourceSize, opticalMeasures)
val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) {
compressSource(source)
}

View File

@@ -15,12 +15,18 @@
package org.fairscan.app.ui.screens.camera
import android.graphics.Bitmap
import android.hardware.camera2.CameraCaptureSession
import android.hardware.camera2.CameraMetadata
import android.hardware.camera2.CaptureRequest
import android.hardware.camera2.CaptureResult
import android.hardware.camera2.TotalCaptureResult
import android.util.Log
import android.util.Size
import android.view.ViewGroup.LayoutParams.MATCH_PARENT
import android.widget.LinearLayout
import androidx.annotation.OptIn
import androidx.camera.camera2.interop.Camera2CameraInfo
import androidx.camera.camera2.interop.Camera2Interop
import androidx.camera.camera2.interop.ExperimentalCamera2Interop
import androidx.camera.core.CameraControl
import androidx.camera.core.CameraSelector
@@ -69,6 +75,7 @@ import androidx.lifecycle.LifecycleOwner
import androidx.lifecycle.compose.LocalLifecycleOwner
import org.fairscan.app.ui.components.CameraPermissionState
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.cameraIntrinsics
@@ -193,7 +200,7 @@ fun bindCameraUseCases(
.setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build()
imageAnalysis.setAnalyzer(executor, onImageAnalyzed)
val imageCapture = ImageCapture.Builder()
val imageCaptureBuilder = ImageCapture.Builder()
.setResolutionSelector(
ResolutionSelector.Builder()
.setResolutionStrategy(
@@ -208,7 +215,21 @@ fun bindCameraUseCases(
.build()
)
.setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY)
.build()
Camera2Interop.Extender(imageCaptureBuilder)
.setSessionCaptureCallback(object : CameraCaptureSession.CaptureCallback() {
override fun onCaptureCompleted(
session: CameraCaptureSession,
request: CaptureRequest,
result: TotalCaptureResult
) {
result.get(CaptureResult.LENS_FOCUS_DISTANCE)?.let {
captureController.lastFocusDistanceDiopters = it
}
}
})
val imageCapture = imageCaptureBuilder.build()
captureController.imageCapture = imageCapture
val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector,
@@ -296,21 +317,34 @@ class CameraCaptureController {
private val executor = Executors.newSingleThreadExecutor()
var previewView: PreviewView? = null
var cameraIntrinsics: CameraIntrinsics? = null
var canUseFocusDistance = false
@Volatile
var lastFocusDistanceDiopters: Float? = null
fun shutdown() {
executor.shutdown()
}
fun takePicture(onImageCaptured: (ImageProxy?, CameraIntrinsics?) -> Unit) {
fun takePicture(onImageCaptured: (ImageProxy?, OpticalMeasures?) -> Unit) {
imageCapture?.takePicture(
executor,
object : ImageCapture.OnImageCapturedCallback() {
override fun onCaptureSuccess(imageProxy: ImageProxy) {
onImageCaptured(imageProxy, cameraIntrinsics)
val diopters = lastFocusDistanceDiopters
val subjectDistanceInMm =
if (canUseFocusDistance && diopters != null && diopters != 0.0f) {
1000 / diopters
} else {
null
}
onImageCaptured(
imageProxy,
cameraIntrinsics?.let { OpticalMeasures(it, subjectDistanceInMm) })
}
override fun onError(exception: ImageCaptureException) {
Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception)
onImageCaptured(null, cameraIntrinsics)
onImageCaptured(null, null)
}
}
)
@@ -344,6 +378,12 @@ class CameraCaptureController {
} else {
cameraIntrinsics(focalLengths[0], max(sensorSize.width, sensorSize.height))
}
val calibration = cameraInfo.getCameraCharacteristic(
android.hardware.camera2.CameraCharacteristics.LENS_INFO_FOCUS_DISTANCE_CALIBRATION
)
canUseFocusDistance =
calibration == CameraMetadata.LENS_INFO_FOCUS_DISTANCE_CALIBRATION_CALIBRATED
|| calibration == CameraMetadata.LENS_INFO_FOCUS_DISTANCE_CALIBRATION_APPROXIMATE
}
}

View File

@@ -229,8 +229,8 @@ fun CameraScreen(
Log.i("FairScan", "Pressed <Capture>")
cameraViewModel.onCapturePressed(it)
captureController.takePicture(
onImageCaptured = { imageProxy, cameraCharacteristics ->
cameraViewModel.onImageCaptured(imageProxy, cameraCharacteristics) }
onImageCaptured = { imageProxy, opticalMeasures ->
cameraViewModel.onImageCaptured(imageProxy, opticalMeasures) }
)
}
},
@@ -648,7 +648,7 @@ fun CameraScreenPreviewWithProcessedImage() {
CapturedPage(
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"),
CompletableDeferred(Jpeg(ByteArray(0))),
PageMetadata(quad, R0, ColorMode.COLOR, null),
PageMetadata(quad, R0, ColorMode.COLOR, null, null),
ColorMode.COLOR)))
}

View File

@@ -36,6 +36,7 @@ import org.fairscan.app.domain.CapturedPage
import org.fairscan.app.platform.extractDocumentFromBitmap
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.detectDocumentQuad
import java.util.concurrent.CancellationException
@@ -134,13 +135,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
}
}
fun onImageCaptured(imageProxy: ImageProxy?, cameraIntrinsics: CameraIntrinsics?) {
fun onImageCaptured(imageProxy: ImageProxy?, opticalMeasures: OpticalMeasures?) {
if (imageProxy != null) {
viewModelScope.launch {
try {
val source = imageProxy.toBitmap()
val rotationDegrees = imageProxy.imageInfo.rotationDegrees
val page = processCapturedImage(source, rotationDegrees, cameraIntrinsics)
val page = processCapturedImage(source, rotationDegrees, opticalMeasures)
imageProxy.close()
onCaptureProcessed(page)
} catch (e: RuntimeException) {
@@ -156,7 +157,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
private suspend fun processCapturedImage(
source: Bitmap,
rotationDegrees: Int,
cameraIntrinsics: CameraIntrinsics?,
opticalMeasures: OpticalMeasures?,
): CapturedPage = withContext(Dispatchers.IO) {
val segmentation = imageSegmentationService.runSegmentationAndReturn(source)
val mask = segmentation?.segmentation
@@ -164,7 +165,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) }
val defaultColorMode = settingsRepository.defaultColorMode.first()
val result = extractDocumentFromBitmap(
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, cameraIntrinsics)
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, opticalMeasures)
return@withContext result
}

View File

@@ -46,7 +46,7 @@ import org.fairscan.app.data.FileManager
import org.fairscan.app.data.ImageRepository
import org.fairscan.app.domain.ExportQuality
import org.fairscan.app.domain.PageViewKey
import org.fairscan.app.domain.jpegsForExport
import org.fairscan.app.domain.pagesToExport
import org.fairscan.app.ui.screens.settings.ExportFormat
import java.io.File
import java.io.FileInputStream
@@ -76,8 +76,8 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit
private suspend fun generatePdf(
exportQuality: ExportQuality
): ExportResult.Pdf = withContext(Dispatchers.IO) {
val jpegs = jpegsForExport(imageRepository, exportQuality)
val pdf = fileManager.generatePdf(jpegs)
val pageToExports = pagesToExport(imageRepository, exportQuality)
val pdf = fileManager.generatePdf(pageToExports)
return@withContext ExportResult.Pdf(pdf.file, pdf.sizeInBytes, pdf.pageCount)
}
@@ -181,12 +181,12 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit
private suspend fun generateJpegs(
exportQuality: ExportQuality
): ExportResult.Jpeg = withContext(Dispatchers.IO) {
val jpegs = jpegsForExport(imageRepository, exportQuality)
val pageToExports = pagesToExport(imageRepository, exportQuality)
val timestamp = System.currentTimeMillis()
preparationDir.mkdirs()
val files = jpegs.mapIndexed { index, jpeg ->
val files = pageToExports.mapIndexed { index, page ->
val file = File(preparationDir, "$timestamp-${index + 1}.jpg")
file.writeBytes(jpeg.get().bytes)
file.writeBytes(page.jpeg.get().bytes)
file
}.toList()
val sizeInBytes = files.sumOf { it.length() }

View File

@@ -17,7 +17,7 @@ package org.fairscan.app.data
import kotlinx.coroutines.test.runTest
import org.assertj.core.api.Assertions.assertThat
import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.JpegProvider
import org.fairscan.app.domain.PageToExport
import org.junit.Test
import java.io.File
import java.io.OutputStream
@@ -73,15 +73,16 @@ class FileManagerTest {
@Test
fun generatePdf() = runTest {
val fakePdfWriter = object : PdfWriter {
override suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int {
val list = jpegs.toList()
list.forEach { bytes -> outputStream.write(bytes.get().bytes) }
override suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int {
val list = pages.toList()
list.forEach { page -> outputStream.write(page.jpeg.get().bytes) }
return list.size
}
}
val manager = FileManager(pdfDir, externalDir, fakePdfWriter)
val jpegs = listOf(byteArrayOf(0x01, 0x02), byteArrayOf(0x11)).map { JpegProvider { Jpeg(it) } }
val pdf = manager.generatePdf(jpegs)
val pages = listOf(byteArrayOf(0x01, 0x02), byteArrayOf(0x11))
.map { PageToExport(null) { Jpeg(it) } }
val pdf = manager.generatePdf(pages)
assertThat(pdf.pageCount).isEqualTo(2)
assertThat(pdf.sizeInBytes).isEqualTo(3)
assertThat(pdf.file.readBytes()).isEqualTo(byteArrayOf(0x01, 0x02, 0x11))

View File

@@ -26,7 +26,6 @@ import org.assertj.core.api.Assertions.assertThat
import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.PageMetadata
import org.fairscan.app.domain.PageViewKey
import org.fairscan.app.domain.Rotation
import org.fairscan.app.domain.Rotation.R0
import org.fairscan.app.domain.Rotation.R180
import org.fairscan.app.domain.Rotation.R270
@@ -35,6 +34,8 @@ import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ColorMode.COLOR
import org.fairscan.imageprocessing.ColorMode.GRAYSCALE
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad
import org.junit.Rule
@@ -52,8 +53,9 @@ class ImageRepositoryTest {
private val testScope = TestScope()
val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09))
val intrinsics = CameraIntrinsics(42.0f, 43.0f)
val metadata1 = PageMetadata(quad1, R90, COLOR, intrinsics)
val opticalMeasures = OpticalMeasures(CameraIntrinsics(42.0f, 43.0f), 44.0f)
val sourceSize = ImageSize(1600, 1200)
val metadata1 = PageMetadata(quad1, R90, COLOR, sourceSize, opticalMeasures)
fun getFilesDir(): File {
if (_filesDir == null) {

View File

@@ -9,7 +9,7 @@ espressoCore = "3.7.0"
lifecycleRuntimeKtx = "2.10.0"
activityCompose = "1.13.0"
composeBom = "2026.03.00"
camerax = "1.5.3"
camerax = "1.6.1"
datastore = "1.2.1"
documentfile = "1.1.0"
litert = "1.4.1"

View File

@@ -25,6 +25,7 @@ import org.opencv.core.MatOfPoint2f
import org.opencv.core.Size
import org.opencv.imgproc.Imgproc
import kotlin.math.abs
import kotlin.math.sqrt
interface Mask {
val width: Int
@@ -156,14 +157,15 @@ fun extractDocument(
rotationDegrees: Int,
colorMode: ColorMode,
maxPixels: Long,
cameraIntrinsics: CameraIntrinsics? = null,
opticalMeasures: OpticalMeasures? = null,
): Mat {
val (targetWidth, targetHeight) = estimateRealDimensions(
val estimatedDimensions = estimateRealDimensions(
quad,
inputMat.cols(),
inputMat.rows(),
cameraIntrinsics
opticalMeasures,
)
val (targetWidth, targetHeight) = estimatedDimensions.toPixelDimensions(quad)
val srcPoints = MatOfPoint2f(
quad.topLeft.toCv(),
quad.topRight.toCv(),
@@ -193,6 +195,17 @@ fun extractDocument(
return rotated
}
fun EstimatedDimensions.toPixelDimensions(quad: Quad): Pair<Double, Double> {
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
val projectedArea = w * h
val ratio = aspectRatio
val targetWidth = sqrt(projectedArea / ratio)
val targetHeight = targetWidth * ratio
return Pair(targetWidth, targetHeight)
}
fun rotate(input: Mat, degrees: Int): Mat {
val output = Mat()
when ((degrees % 360 + 360) % 360) {

View File

@@ -47,6 +47,24 @@ fun cameraIntrinsics(focalLengthInMm: Float?, sensorWidthInMm: Float?): CameraIn
return CameraIntrinsics(focalLengthInMm, sensorWidthInMm)
}
data class OpticalMeasures(
val cameraIntrinsics: CameraIntrinsics,
// in millimeters
val subjectDistance: Float?,
)
sealed class EstimatedDimensions {
// Dimensions in mm, when subject distance is available
data class Physical(val widthMm: Double, val heightMm: Double) : EstimatedDimensions()
// Dimensions in arbitrary units, only ratio is meaningful
data class Ratio(val width: Double, val height: Double) : EstimatedDimensions()
val aspectRatio: Double get() = when (this) {
is Physical -> heightMm / widthMm
is Ratio -> height / width
}
}
/**
* Estimates the true width and height of the document in the output image,
* correcting for perspective distortion using projective geometry.
@@ -63,13 +81,13 @@ fun estimateRealDimensions(
quad: Quad,
imageWidth: Int,
imageHeight: Int,
cameraIntrinsics: CameraIntrinsics?
): Pair<Double, Double> {
opticalMeasures: OpticalMeasures?,
): EstimatedDimensions {
fun averageSides(): Pair<Double, Double> {
fun averageSides(): EstimatedDimensions.Ratio {
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
return Pair(w, h)
return EstimatedDimensions.Ratio(w, h)
}
// Homogeneous 2D point
@@ -97,8 +115,9 @@ fun estimateRealDimensions(
val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy)
val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy)
val f = if (cameraIntrinsics != null) {
cameraIntrinsics.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble()
val f = if (opticalMeasures != null) {
opticalMeasures.cameraIntrinsics
.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble()
} else {
// Focal length estimated assuming zero skew and principal point at image center.
// Under these assumptions, the Image of the Absolute Conic (IAC) simplifies,
@@ -131,26 +150,37 @@ fun estimateRealDimensions(
// Camera ray through a corner: K⁻¹ · (u, v, 1)
fun ray(p: Point) = Vector3D((p.x - cx) / f, (p.y - cy) / f, 1.0)
// Intersect ray with document plane: X = t·r where t = 1 / (n·r)
// We assume an arbitrary plane distance (d = 1). Absolute scale is wrong,
// but cancels out when computing length ratios.
// Scale factor: either from subject distance, or arbitrary (ratio only)
val subjectDistance = opticalMeasures?.subjectDistance?.toDouble()
val scale: Double? = if (subjectDistance != null) {
// Project subject distance onto the plane normal to get perpendicular distance
val centerX = (quad.topLeft.x + quad.topRight.x + quad.bottomLeft.x + quad.bottomRight.x) / 4.0
val centerY = (quad.topLeft.y + quad.topRight.y + quad.bottomLeft.y + quad.bottomRight.y) / 4.0
val centerRay = ray(Point(centerX, centerY)).let { it * (1.0 / it.norm()) }
val cosAngle = centerRay.dotProduct(n).absoluteValue
if (cosAngle < 0.1) null // document too tilted, unreliable
else subjectDistance * cosAngle
} else null
// Intersect ray with document plane: X = t·r where t = d / (n·r)
// When subjectDistance is unavailable, we assume an arbitrary plane distance (d = 1): absolute
// scale is wrong, but cancels out when computing length ratios.
fun corner3D(p: Point): Vector3D {
val r = ray(p)
return r * (1.0 / n.dotProduct(r))
val t = if (scale != null) scale / n.dotProduct(r) else 1.0 / n.dotProduct(r)
return r * t
}
val xTL = corner3D(quad.topLeft); val xTR = corner3D(quad.topRight)
val xBR = corner3D(quad.bottomRight); val xBL = corner3D(quad.bottomLeft)
// Side lengths in reconstructed 3D space (up to an unknown global scale)
// Side lengths in reconstructed 3D space
val realW = ((xTR - xTL).norm() + (xBR - xBL).norm()) / 2
val realH = ((xBL - xTL).norm() + (xBR - xTR).norm()) / 2
// Output dimensions: preserve projected area, apply corrected aspect ratio
val ratio = realH / realW
val (projW, projH) = averageSides()
val targetWidth = sqrt(projW * projH / ratio)
val targetHeight = targetWidth * ratio
return Pair(targetWidth, targetHeight)
return if (opticalMeasures != null && scale != null) {
EstimatedDimensions.Physical(realW, realH)
} else {
EstimatedDimensions.Ratio(realW, realH)
}
}