Use focus distance to estimate physical size

This commit is contained in:
Pierre-Yves Nicolas
2026-05-23 12:22:37 +02:00
parent b89aecf369
commit 9394b19731
16 changed files with 221 additions and 81 deletions

View File

@@ -15,7 +15,6 @@
package org.fairscan.app.data package org.fairscan.app.data
import kotlinx.serialization.Serializable import kotlinx.serialization.Serializable
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
@Serializable @Serializable
@@ -47,6 +46,9 @@ data class PageV2(
val colorMode: ColorMode? = null, val colorMode: ColorMode? = null,
val focalLength: Float? = null, val focalLength: Float? = null,
val sensorWidth: Float? = null, val sensorWidth: Float? = null,
val subjectDistance: Float? = null,
val sourceWidth: Int? = null,
val sourceHeight: Int? = null,
) )
@Serializable @Serializable

View File

@@ -14,7 +14,7 @@
*/ */
package org.fairscan.app.data package org.fairscan.app.data
import org.fairscan.app.domain.JpegProvider import org.fairscan.app.domain.PageToExport
import java.io.File import java.io.File
import java.io.FileOutputStream import java.io.FileOutputStream
import java.io.OutputStream import java.io.OutputStream
@@ -26,7 +26,7 @@ data class GeneratedPdf(
) )
fun interface PdfWriter { fun interface PdfWriter {
suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int
} }
class FileManager( class FileManager(
@@ -43,12 +43,12 @@ class FileManager(
} }
} }
suspend fun generatePdf(jpegs: List<JpegProvider>): GeneratedPdf { suspend fun generatePdf(pages: List<PageToExport>): GeneratedPdf {
pdfDir.mkdirs() pdfDir.mkdirs()
require(pdfDir.exists() && pdfDir.isDirectory) { "Invalid pdfDir: $pdfDir" } require(pdfDir.exists() && pdfDir.isDirectory) { "Invalid pdfDir: $pdfDir" }
val file = File(pdfDir, "${System.currentTimeMillis()}.pdf") val file = File(pdfDir, "${System.currentTimeMillis()}.pdf")
val pageCount = FileOutputStream(file).use { val pageCount = FileOutputStream(file).use {
pdfWriter.writePdfFromJpegs(jpegs, it) pdfWriter.writePdfFromJpegs(pages, it)
} }
val sizeBytes = file.length() val sizeBytes = file.length()
return GeneratedPdf(file, sizeBytes, pageCount) return GeneratedPdf(file, sizeBytes, pageCount)

View File

@@ -33,6 +33,8 @@ import org.fairscan.app.domain.PageViewKey
import org.fairscan.app.domain.Rotation import org.fairscan.app.domain.Rotation
import org.fairscan.app.domain.ScanPage import org.fairscan.app.domain.ScanPage
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Point import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.cameraIntrinsics import org.fairscan.imageprocessing.cameraIntrinsics
@@ -154,8 +156,11 @@ class ImageRepository(
manualRotationDegrees = Rotation.R0.degrees, manualRotationDegrees = Rotation.R0.degrees,
isColored = metadata.autoColorMode == ColorMode.COLOR, isColored = metadata.autoColorMode == ColorMode.COLOR,
colorMode = colorMode, colorMode = colorMode,
focalLength = metadata.cameraIntrinsics?.focalLength, focalLength = metadata.opticalMeasures?.cameraIntrinsics?.focalLength,
sensorWidth = metadata.cameraIntrinsics?.sensorWidth, sensorWidth = metadata.opticalMeasures?.cameraIntrinsics?.sensorWidth,
subjectDistance = metadata.opticalMeasures?.subjectDistance,
sourceWidth = metadata.sourceSize?.width?.toInt(),
sourceHeight = metadata.sourceSize?.height?.toInt(),
) )
) )
saveMetadata() saveMetadata()
@@ -402,10 +407,17 @@ fun NormalizedQuad.toQuad(): Quad =
fun PageV2.toMetadata(): PageMetadata? { fun PageV2.toMetadata(): PageMetadata? {
if (quad == null || isColored == null) return null if (quad == null || isColored == null) return null
val cameraIntrinsics = cameraIntrinsics(focalLength, sensorWidth)
val sourceSize =
if (sourceWidth != null && sourceHeight != null)
ImageSize(sourceWidth, sourceHeight)
else
null
return PageMetadata( return PageMetadata(
(userQuad ?: quad).toQuad(), (userQuad ?: quad).toQuad(),
Rotation.fromDegrees(baseRotationDegrees), Rotation.fromDegrees(baseRotationDegrees),
if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE, if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE,
cameraIntrinsics(focalLength, sensorWidth) sourceSize,
cameraIntrinsics?.let { OpticalMeasures(it, subjectDistance) },
) )
} }

View File

@@ -16,26 +16,55 @@ package org.fairscan.app.domain
import org.fairscan.app.data.ImageRepository import org.fairscan.app.data.ImageRepository
import org.fairscan.app.platform.processedImage import org.fairscan.app.platform.processedImage
import org.fairscan.imageprocessing.EstimatedDimensions
import org.fairscan.imageprocessing.estimateRealDimensions
import org.fairscan.imageprocessing.resizeForMaxPixels import org.fairscan.imageprocessing.resizeForMaxPixels
import org.fairscan.imageprocessing.scaledTo
import org.opencv.core.Mat import org.opencv.core.Mat
fun interface JpegProvider { fun interface JpegProvider {
suspend fun get(): Jpeg suspend fun get(): Jpeg
} }
suspend fun jpegsForExport( data class PageToExport(
val metadata: PageMetadata?,
val jpeg: JpegProvider,
) {
fun estimatedDimensions(): EstimatedDimensions? {
if (metadata == null)
return null
val size = metadata.sourceSize
if (size == null)
return null
val quad = metadata.normalizedQuad.scaledTo(1.0, 1.0, size.width, size.height)
val realDimensions = estimateRealDimensions(
quad, size.width.toInt(), size.height.toInt(), metadata.opticalMeasures
)
return realDimensions.applyRotation(metadata.baseRotation)
}
}
private fun EstimatedDimensions.applyRotation(rotation: Rotation): EstimatedDimensions {
if ((rotation == Rotation.R90 || rotation == Rotation.R270)
&& this is EstimatedDimensions.Physical) {
return EstimatedDimensions.Physical(heightMm, widthMm)
}
return this
}
suspend fun pagesToExport(
imageRepository: ImageRepository, imageRepository: ImageRepository,
exportQuality: ExportQuality exportQuality: ExportQuality
): List<JpegProvider> { ): List<PageToExport> {
val pages = imageRepository.pages() val pages = imageRepository.pages()
return when (exportQuality) { return when (exportQuality) {
ExportQuality.BALANCED -> pages.map { ExportQuality.BALANCED -> pages.map {
JpegProvider { jpeg(it, imageRepository) } PageToExport(it.metadata) { jpeg(it, imageRepository) }
} }
ExportQuality.LOW -> pages.map { page -> ExportQuality.LOW -> pages.map { page ->
JpegProvider { PageToExport(page.metadata) {
resizeJpegBytesForMaxPixels( resizeJpegBytesForMaxPixels(
jpeg = jpeg(page, imageRepository), jpeg = jpeg(page, imageRepository),
maxPixels = exportQuality.maxPixels.toDouble(), maxPixels = exportQuality.maxPixels.toDouble(),
@@ -45,7 +74,7 @@ suspend fun jpegsForExport(
} }
ExportQuality.HIGH -> pages.map { page -> ExportQuality.HIGH -> pages.map { page ->
JpegProvider { PageToExport(page.metadata) {
val source = imageRepository.source(page.id) val source = imageRepository.source(page.id)
val metadata = page.metadata val metadata = page.metadata
val colorMode = page.colorMode val colorMode = page.colorMode

View File

@@ -14,15 +14,17 @@
*/ */
package org.fairscan.app.domain package org.fairscan.app.domain
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.Quad
data class PageMetadata( data class PageMetadata(
val normalizedQuad: Quad, val normalizedQuad: Quad,
val baseRotation: Rotation, val baseRotation: Rotation,
val autoColorMode: ColorMode, val autoColorMode: ColorMode,
val cameraIntrinsics: CameraIntrinsics?, val sourceSize: ImageSize?,
val opticalMeasures: OpticalMeasures?,
) )
data class ScanPage( data class ScanPage(

View File

@@ -22,33 +22,38 @@ import com.tom_roush.pdfbox.pdmodel.common.PDRectangle
import com.tom_roush.pdfbox.pdmodel.graphics.image.JPEGFactory import com.tom_roush.pdfbox.pdmodel.graphics.image.JPEGFactory
import org.fairscan.app.BuildConfig import org.fairscan.app.BuildConfig
import org.fairscan.app.data.PdfWriter import org.fairscan.app.data.PdfWriter
import org.fairscan.app.domain.JpegProvider import org.fairscan.app.domain.PageToExport
import org.fairscan.imageprocessing.EstimatedDimensions
import java.io.OutputStream import java.io.OutputStream
import java.util.Calendar import java.util.Calendar
class AndroidPdfWriter : PdfWriter { class AndroidPdfWriter : PdfWriter {
override suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int { override suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int {
val doc = PDDocument() val doc = PDDocument()
doc.documentInformation.creationDate = Calendar.getInstance() doc.documentInformation.creationDate = Calendar.getInstance()
doc.documentInformation.creator = "FairScan ${BuildConfig.VERSION_NAME}" doc.documentInformation.creator = "FairScan ${BuildConfig.VERSION_NAME}"
doc.use { document -> doc.use { document ->
for (jpegBytes in jpegs) { for (page in pages) {
val image = JPEGFactory.createFromByteArray(document, jpegBytes.get().bytes) val image = JPEGFactory.createFromByteArray(document, page.jpeg.get().bytes)
// Let's say that the physical dimensions of the page are close to US Letter
// US Letter: 215.9×279.4 mm (A4: 210×297 mm)
val maxDimInMm = 279.4f
// PDF has 72 points (units) per inch, 1 inch = 25.4 mm // PDF has 72 points (units) per inch, 1 inch = 25.4 mm
val pointsPerMm = 72f / 25.4f val pointsPerMm = 72f / 25.4f
val widthPx = image.width.toFloat() val widthPx = image.width.toFloat()
val heightPx = image.height.toFloat() val heightPx = image.height.toFloat()
val maxPx = maxOf(widthPx, heightPx) val dimensions = page.estimatedDimensions()
val scalePxToMm = maxDimInMm / maxPx val (widthPoints, heightPoints) = when (dimensions) {
is EstimatedDimensions.Physical -> {
val widthPoints = widthPx * scalePxToMm * pointsPerMm dimensions.widthMm.toFloat() * pointsPerMm to dimensions.heightMm.toFloat() * pointsPerMm
val heightPoints = heightPx * scalePxToMm * pointsPerMm }
else -> {
// No physical dimensions available: approximate using US Letter max dimension
val maxDimInMm = 279.4f
val scalePxToMm = maxDimInMm / maxOf(widthPx, heightPx)
widthPx * scalePxToMm * pointsPerMm to heightPx * scalePxToMm * pointsPerMm
}
}
val page = PDPage(PDRectangle(widthPoints, heightPoints)) val page = PDPage(PDRectangle(widthPoints, heightPoints))
document.addPage(page) document.addPage(page)

View File

@@ -25,9 +25,10 @@ import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.PageMetadata import org.fairscan.app.domain.PageMetadata
import org.fairscan.app.domain.Rotation import org.fairscan.app.domain.Rotation
import org.fairscan.app.ui.screens.settings.DefaultColorMode import org.fairscan.app.ui.screens.settings.DefaultColorMode
import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.Mask import org.fairscan.imageprocessing.Mask
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Point import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.autoColorMode import org.fairscan.imageprocessing.autoColorMode
@@ -102,7 +103,7 @@ fun processedImage(
sourceMat = source.toMat() sourceMat = source.toMat()
val quad = metadata.normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height()) val quad = metadata.normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height())
page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels, page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
metadata.cameraIntrinsics) metadata.opticalMeasures)
return Jpeg.fromMat(page, exportQuality.jpegQuality) return Jpeg.fromMat(page, exportQuality.jpegQuality)
} finally { } finally {
sourceMat?.release() sourceMat?.release()
@@ -117,7 +118,7 @@ fun extractDocumentFromBitmap(
mask: Mask?, mask: Mask?,
viewModelScope: CoroutineScope, viewModelScope: CoroutineScope,
defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO, defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO,
cameraIntrinsics: CameraIntrinsics?, opticalMeasures: OpticalMeasures?,
): CapturedPage { ): CapturedPage {
val exportQuality = ExportQuality.BALANCED val exportQuality = ExportQuality.BALANCED
var colorMode = ColorMode.COLOR var colorMode = ColorMode.COLOR
@@ -144,7 +145,7 @@ fun extractDocumentFromBitmap(
autoColorMode = autoColorMode(bgr, mask, quad) autoColorMode = autoColorMode(bgr, mask, quad)
colorMode = defaultColorMode.colorMode ?: autoColorMode colorMode = defaultColorMode.colorMode ?: autoColorMode
page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels, page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
cameraIntrinsics) opticalMeasures)
} }
val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality) val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality)
@@ -152,7 +153,9 @@ fun extractDocumentFromBitmap(
page.release() page.release()
val baseRotation = Rotation.fromDegrees(rotationDegrees) val baseRotation = Rotation.fromDegrees(rotationDegrees)
val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode, cameraIntrinsics) val sourceSize = ImageSize(source.width, source.height)
val metadata =
PageMetadata(normalizedQuad, baseRotation, autoColorMode, sourceSize, opticalMeasures)
val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) { val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) {
compressSource(source) compressSource(source)
} }

View File

@@ -15,12 +15,18 @@
package org.fairscan.app.ui.screens.camera package org.fairscan.app.ui.screens.camera
import android.graphics.Bitmap import android.graphics.Bitmap
import android.hardware.camera2.CameraCaptureSession
import android.hardware.camera2.CameraMetadata
import android.hardware.camera2.CaptureRequest
import android.hardware.camera2.CaptureResult
import android.hardware.camera2.TotalCaptureResult
import android.util.Log import android.util.Log
import android.util.Size import android.util.Size
import android.view.ViewGroup.LayoutParams.MATCH_PARENT import android.view.ViewGroup.LayoutParams.MATCH_PARENT
import android.widget.LinearLayout import android.widget.LinearLayout
import androidx.annotation.OptIn import androidx.annotation.OptIn
import androidx.camera.camera2.interop.Camera2CameraInfo import androidx.camera.camera2.interop.Camera2CameraInfo
import androidx.camera.camera2.interop.Camera2Interop
import androidx.camera.camera2.interop.ExperimentalCamera2Interop import androidx.camera.camera2.interop.ExperimentalCamera2Interop
import androidx.camera.core.CameraControl import androidx.camera.core.CameraControl
import androidx.camera.core.CameraSelector import androidx.camera.core.CameraSelector
@@ -69,6 +75,7 @@ import androidx.lifecycle.LifecycleOwner
import androidx.lifecycle.compose.LocalLifecycleOwner import androidx.lifecycle.compose.LocalLifecycleOwner
import org.fairscan.app.ui.components.CameraPermissionState import org.fairscan.app.ui.components.CameraPermissionState
import org.fairscan.imageprocessing.CameraIntrinsics import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Point import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.cameraIntrinsics import org.fairscan.imageprocessing.cameraIntrinsics
@@ -193,7 +200,7 @@ fun bindCameraUseCases(
.setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build() .setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build()
imageAnalysis.setAnalyzer(executor, onImageAnalyzed) imageAnalysis.setAnalyzer(executor, onImageAnalyzed)
val imageCapture = ImageCapture.Builder() val imageCaptureBuilder = ImageCapture.Builder()
.setResolutionSelector( .setResolutionSelector(
ResolutionSelector.Builder() ResolutionSelector.Builder()
.setResolutionStrategy( .setResolutionStrategy(
@@ -208,7 +215,21 @@ fun bindCameraUseCases(
.build() .build()
) )
.setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY) .setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY)
.build()
Camera2Interop.Extender(imageCaptureBuilder)
.setSessionCaptureCallback(object : CameraCaptureSession.CaptureCallback() {
override fun onCaptureCompleted(
session: CameraCaptureSession,
request: CaptureRequest,
result: TotalCaptureResult
) {
result.get(CaptureResult.LENS_FOCUS_DISTANCE)?.let {
captureController.lastFocusDistanceDiopters = it
}
}
})
val imageCapture = imageCaptureBuilder.build()
captureController.imageCapture = imageCapture captureController.imageCapture = imageCapture
val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector,
@@ -296,21 +317,34 @@ class CameraCaptureController {
private val executor = Executors.newSingleThreadExecutor() private val executor = Executors.newSingleThreadExecutor()
var previewView: PreviewView? = null var previewView: PreviewView? = null
var cameraIntrinsics: CameraIntrinsics? = null var cameraIntrinsics: CameraIntrinsics? = null
var canUseFocusDistance = false
@Volatile
var lastFocusDistanceDiopters: Float? = null
fun shutdown() { fun shutdown() {
executor.shutdown() executor.shutdown()
} }
fun takePicture(onImageCaptured: (ImageProxy?, CameraIntrinsics?) -> Unit) { fun takePicture(onImageCaptured: (ImageProxy?, OpticalMeasures?) -> Unit) {
imageCapture?.takePicture( imageCapture?.takePicture(
executor, executor,
object : ImageCapture.OnImageCapturedCallback() { object : ImageCapture.OnImageCapturedCallback() {
override fun onCaptureSuccess(imageProxy: ImageProxy) { override fun onCaptureSuccess(imageProxy: ImageProxy) {
onImageCaptured(imageProxy, cameraIntrinsics) val diopters = lastFocusDistanceDiopters
val subjectDistanceInMm =
if (canUseFocusDistance && diopters != null && diopters != 0.0f) {
1000 / diopters
} else {
null
}
onImageCaptured(
imageProxy,
cameraIntrinsics?.let { OpticalMeasures(it, subjectDistanceInMm) })
} }
override fun onError(exception: ImageCaptureException) { override fun onError(exception: ImageCaptureException) {
Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception) Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception)
onImageCaptured(null, cameraIntrinsics) onImageCaptured(null, null)
} }
} }
) )
@@ -344,6 +378,12 @@ class CameraCaptureController {
} else { } else {
cameraIntrinsics(focalLengths[0], max(sensorSize.width, sensorSize.height)) cameraIntrinsics(focalLengths[0], max(sensorSize.width, sensorSize.height))
} }
val calibration = cameraInfo.getCameraCharacteristic(
android.hardware.camera2.CameraCharacteristics.LENS_INFO_FOCUS_DISTANCE_CALIBRATION
)
canUseFocusDistance =
calibration == CameraMetadata.LENS_INFO_FOCUS_DISTANCE_CALIBRATION_CALIBRATED
|| calibration == CameraMetadata.LENS_INFO_FOCUS_DISTANCE_CALIBRATION_APPROXIMATE
} }
} }

View File

@@ -229,8 +229,8 @@ fun CameraScreen(
Log.i("FairScan", "Pressed <Capture>") Log.i("FairScan", "Pressed <Capture>")
cameraViewModel.onCapturePressed(it) cameraViewModel.onCapturePressed(it)
captureController.takePicture( captureController.takePicture(
onImageCaptured = { imageProxy, cameraCharacteristics -> onImageCaptured = { imageProxy, opticalMeasures ->
cameraViewModel.onImageCaptured(imageProxy, cameraCharacteristics) } cameraViewModel.onImageCaptured(imageProxy, opticalMeasures) }
) )
} }
}, },
@@ -648,7 +648,7 @@ fun CameraScreenPreviewWithProcessedImage() {
CapturedPage( CapturedPage(
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"), debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"),
CompletableDeferred(Jpeg(ByteArray(0))), CompletableDeferred(Jpeg(ByteArray(0))),
PageMetadata(quad, R0, ColorMode.COLOR, null), PageMetadata(quad, R0, ColorMode.COLOR, null, null),
ColorMode.COLOR))) ColorMode.COLOR)))
} }

View File

@@ -36,6 +36,7 @@ import org.fairscan.app.domain.CapturedPage
import org.fairscan.app.platform.extractDocumentFromBitmap import org.fairscan.app.platform.extractDocumentFromBitmap
import org.fairscan.imageprocessing.CameraIntrinsics import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ImageSize import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.detectDocumentQuad import org.fairscan.imageprocessing.detectDocumentQuad
import java.util.concurrent.CancellationException import java.util.concurrent.CancellationException
@@ -134,13 +135,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
} }
} }
fun onImageCaptured(imageProxy: ImageProxy?, cameraIntrinsics: CameraIntrinsics?) { fun onImageCaptured(imageProxy: ImageProxy?, opticalMeasures: OpticalMeasures?) {
if (imageProxy != null) { if (imageProxy != null) {
viewModelScope.launch { viewModelScope.launch {
try { try {
val source = imageProxy.toBitmap() val source = imageProxy.toBitmap()
val rotationDegrees = imageProxy.imageInfo.rotationDegrees val rotationDegrees = imageProxy.imageInfo.rotationDegrees
val page = processCapturedImage(source, rotationDegrees, cameraIntrinsics) val page = processCapturedImage(source, rotationDegrees, opticalMeasures)
imageProxy.close() imageProxy.close()
onCaptureProcessed(page) onCaptureProcessed(page)
} catch (e: RuntimeException) { } catch (e: RuntimeException) {
@@ -156,7 +157,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
private suspend fun processCapturedImage( private suspend fun processCapturedImage(
source: Bitmap, source: Bitmap,
rotationDegrees: Int, rotationDegrees: Int,
cameraIntrinsics: CameraIntrinsics?, opticalMeasures: OpticalMeasures?,
): CapturedPage = withContext(Dispatchers.IO) { ): CapturedPage = withContext(Dispatchers.IO) {
val segmentation = imageSegmentationService.runSegmentationAndReturn(source) val segmentation = imageSegmentationService.runSegmentationAndReturn(source)
val mask = segmentation?.segmentation val mask = segmentation?.segmentation
@@ -164,7 +165,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) } val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) }
val defaultColorMode = settingsRepository.defaultColorMode.first() val defaultColorMode = settingsRepository.defaultColorMode.first()
val result = extractDocumentFromBitmap( val result = extractDocumentFromBitmap(
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, cameraIntrinsics) source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, opticalMeasures)
return@withContext result return@withContext result
} }

View File

@@ -46,7 +46,7 @@ import org.fairscan.app.data.FileManager
import org.fairscan.app.data.ImageRepository import org.fairscan.app.data.ImageRepository
import org.fairscan.app.domain.ExportQuality import org.fairscan.app.domain.ExportQuality
import org.fairscan.app.domain.PageViewKey import org.fairscan.app.domain.PageViewKey
import org.fairscan.app.domain.jpegsForExport import org.fairscan.app.domain.pagesToExport
import org.fairscan.app.ui.screens.settings.ExportFormat import org.fairscan.app.ui.screens.settings.ExportFormat
import java.io.File import java.io.File
import java.io.FileInputStream import java.io.FileInputStream
@@ -76,8 +76,8 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit
private suspend fun generatePdf( private suspend fun generatePdf(
exportQuality: ExportQuality exportQuality: ExportQuality
): ExportResult.Pdf = withContext(Dispatchers.IO) { ): ExportResult.Pdf = withContext(Dispatchers.IO) {
val jpegs = jpegsForExport(imageRepository, exportQuality) val pageToExports = pagesToExport(imageRepository, exportQuality)
val pdf = fileManager.generatePdf(jpegs) val pdf = fileManager.generatePdf(pageToExports)
return@withContext ExportResult.Pdf(pdf.file, pdf.sizeInBytes, pdf.pageCount) return@withContext ExportResult.Pdf(pdf.file, pdf.sizeInBytes, pdf.pageCount)
} }
@@ -181,12 +181,12 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit
private suspend fun generateJpegs( private suspend fun generateJpegs(
exportQuality: ExportQuality exportQuality: ExportQuality
): ExportResult.Jpeg = withContext(Dispatchers.IO) { ): ExportResult.Jpeg = withContext(Dispatchers.IO) {
val jpegs = jpegsForExport(imageRepository, exportQuality) val pageToExports = pagesToExport(imageRepository, exportQuality)
val timestamp = System.currentTimeMillis() val timestamp = System.currentTimeMillis()
preparationDir.mkdirs() preparationDir.mkdirs()
val files = jpegs.mapIndexed { index, jpeg -> val files = pageToExports.mapIndexed { index, page ->
val file = File(preparationDir, "$timestamp-${index + 1}.jpg") val file = File(preparationDir, "$timestamp-${index + 1}.jpg")
file.writeBytes(jpeg.get().bytes) file.writeBytes(page.jpeg.get().bytes)
file file
}.toList() }.toList()
val sizeInBytes = files.sumOf { it.length() } val sizeInBytes = files.sumOf { it.length() }

View File

@@ -17,7 +17,7 @@ package org.fairscan.app.data
import kotlinx.coroutines.test.runTest import kotlinx.coroutines.test.runTest
import org.assertj.core.api.Assertions.assertThat import org.assertj.core.api.Assertions.assertThat
import org.fairscan.app.domain.Jpeg import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.JpegProvider import org.fairscan.app.domain.PageToExport
import org.junit.Test import org.junit.Test
import java.io.File import java.io.File
import java.io.OutputStream import java.io.OutputStream
@@ -73,15 +73,16 @@ class FileManagerTest {
@Test @Test
fun generatePdf() = runTest { fun generatePdf() = runTest {
val fakePdfWriter = object : PdfWriter { val fakePdfWriter = object : PdfWriter {
override suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int { override suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int {
val list = jpegs.toList() val list = pages.toList()
list.forEach { bytes -> outputStream.write(bytes.get().bytes) } list.forEach { page -> outputStream.write(page.jpeg.get().bytes) }
return list.size return list.size
} }
} }
val manager = FileManager(pdfDir, externalDir, fakePdfWriter) val manager = FileManager(pdfDir, externalDir, fakePdfWriter)
val jpegs = listOf(byteArrayOf(0x01, 0x02), byteArrayOf(0x11)).map { JpegProvider { Jpeg(it) } } val pages = listOf(byteArrayOf(0x01, 0x02), byteArrayOf(0x11))
val pdf = manager.generatePdf(jpegs) .map { PageToExport(null) { Jpeg(it) } }
val pdf = manager.generatePdf(pages)
assertThat(pdf.pageCount).isEqualTo(2) assertThat(pdf.pageCount).isEqualTo(2)
assertThat(pdf.sizeInBytes).isEqualTo(3) assertThat(pdf.sizeInBytes).isEqualTo(3)
assertThat(pdf.file.readBytes()).isEqualTo(byteArrayOf(0x01, 0x02, 0x11)) assertThat(pdf.file.readBytes()).isEqualTo(byteArrayOf(0x01, 0x02, 0x11))

View File

@@ -26,7 +26,6 @@ import org.assertj.core.api.Assertions.assertThat
import org.fairscan.app.domain.Jpeg import org.fairscan.app.domain.Jpeg
import org.fairscan.app.domain.PageMetadata import org.fairscan.app.domain.PageMetadata
import org.fairscan.app.domain.PageViewKey import org.fairscan.app.domain.PageViewKey
import org.fairscan.app.domain.Rotation
import org.fairscan.app.domain.Rotation.R0 import org.fairscan.app.domain.Rotation.R0
import org.fairscan.app.domain.Rotation.R180 import org.fairscan.app.domain.Rotation.R180
import org.fairscan.app.domain.Rotation.R270 import org.fairscan.app.domain.Rotation.R270
@@ -35,6 +34,8 @@ import org.fairscan.imageprocessing.CameraIntrinsics
import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode
import org.fairscan.imageprocessing.ColorMode.COLOR import org.fairscan.imageprocessing.ColorMode.COLOR
import org.fairscan.imageprocessing.ColorMode.GRAYSCALE import org.fairscan.imageprocessing.ColorMode.GRAYSCALE
import org.fairscan.imageprocessing.ImageSize
import org.fairscan.imageprocessing.OpticalMeasures
import org.fairscan.imageprocessing.Point import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.Quad
import org.junit.Rule import org.junit.Rule
@@ -52,8 +53,9 @@ class ImageRepositoryTest {
private val testScope = TestScope() private val testScope = TestScope()
val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09)) val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09))
val intrinsics = CameraIntrinsics(42.0f, 43.0f) val opticalMeasures = OpticalMeasures(CameraIntrinsics(42.0f, 43.0f), 44.0f)
val metadata1 = PageMetadata(quad1, R90, COLOR, intrinsics) val sourceSize = ImageSize(1600, 1200)
val metadata1 = PageMetadata(quad1, R90, COLOR, sourceSize, opticalMeasures)
fun getFilesDir(): File { fun getFilesDir(): File {
if (_filesDir == null) { if (_filesDir == null) {

View File

@@ -9,7 +9,7 @@ espressoCore = "3.7.0"
lifecycleRuntimeKtx = "2.10.0" lifecycleRuntimeKtx = "2.10.0"
activityCompose = "1.13.0" activityCompose = "1.13.0"
composeBom = "2026.03.00" composeBom = "2026.03.00"
camerax = "1.5.3" camerax = "1.6.1"
datastore = "1.2.1" datastore = "1.2.1"
documentfile = "1.1.0" documentfile = "1.1.0"
litert = "1.4.1" litert = "1.4.1"

View File

@@ -25,6 +25,7 @@ import org.opencv.core.MatOfPoint2f
import org.opencv.core.Size import org.opencv.core.Size
import org.opencv.imgproc.Imgproc import org.opencv.imgproc.Imgproc
import kotlin.math.abs import kotlin.math.abs
import kotlin.math.sqrt
interface Mask { interface Mask {
val width: Int val width: Int
@@ -156,14 +157,15 @@ fun extractDocument(
rotationDegrees: Int, rotationDegrees: Int,
colorMode: ColorMode, colorMode: ColorMode,
maxPixels: Long, maxPixels: Long,
cameraIntrinsics: CameraIntrinsics? = null, opticalMeasures: OpticalMeasures? = null,
): Mat { ): Mat {
val (targetWidth, targetHeight) = estimateRealDimensions( val estimatedDimensions = estimateRealDimensions(
quad, quad,
inputMat.cols(), inputMat.cols(),
inputMat.rows(), inputMat.rows(),
cameraIntrinsics opticalMeasures,
) )
val (targetWidth, targetHeight) = estimatedDimensions.toPixelDimensions(quad)
val srcPoints = MatOfPoint2f( val srcPoints = MatOfPoint2f(
quad.topLeft.toCv(), quad.topLeft.toCv(),
quad.topRight.toCv(), quad.topRight.toCv(),
@@ -193,6 +195,17 @@ fun extractDocument(
return rotated return rotated
} }
fun EstimatedDimensions.toPixelDimensions(quad: Quad): Pair<Double, Double> {
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
val projectedArea = w * h
val ratio = aspectRatio
val targetWidth = sqrt(projectedArea / ratio)
val targetHeight = targetWidth * ratio
return Pair(targetWidth, targetHeight)
}
fun rotate(input: Mat, degrees: Int): Mat { fun rotate(input: Mat, degrees: Int): Mat {
val output = Mat() val output = Mat()
when ((degrees % 360 + 360) % 360) { when ((degrees % 360 + 360) % 360) {

View File

@@ -47,6 +47,24 @@ fun cameraIntrinsics(focalLengthInMm: Float?, sensorWidthInMm: Float?): CameraIn
return CameraIntrinsics(focalLengthInMm, sensorWidthInMm) return CameraIntrinsics(focalLengthInMm, sensorWidthInMm)
} }
data class OpticalMeasures(
val cameraIntrinsics: CameraIntrinsics,
// in millimeters
val subjectDistance: Float?,
)
sealed class EstimatedDimensions {
// Dimensions in mm, when subject distance is available
data class Physical(val widthMm: Double, val heightMm: Double) : EstimatedDimensions()
// Dimensions in arbitrary units, only ratio is meaningful
data class Ratio(val width: Double, val height: Double) : EstimatedDimensions()
val aspectRatio: Double get() = when (this) {
is Physical -> heightMm / widthMm
is Ratio -> height / width
}
}
/** /**
* Estimates the true width and height of the document in the output image, * Estimates the true width and height of the document in the output image,
* correcting for perspective distortion using projective geometry. * correcting for perspective distortion using projective geometry.
@@ -63,13 +81,13 @@ fun estimateRealDimensions(
quad: Quad, quad: Quad,
imageWidth: Int, imageWidth: Int,
imageHeight: Int, imageHeight: Int,
cameraIntrinsics: CameraIntrinsics? opticalMeasures: OpticalMeasures?,
): Pair<Double, Double> { ): EstimatedDimensions {
fun averageSides(): Pair<Double, Double> { fun averageSides(): EstimatedDimensions.Ratio {
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2 val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2 val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
return Pair(w, h) return EstimatedDimensions.Ratio(w, h)
} }
// Homogeneous 2D point // Homogeneous 2D point
@@ -97,8 +115,9 @@ fun estimateRealDimensions(
val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy) val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy)
val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy) val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy)
val f = if (cameraIntrinsics != null) { val f = if (opticalMeasures != null) {
cameraIntrinsics.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble() opticalMeasures.cameraIntrinsics
.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble()
} else { } else {
// Focal length estimated assuming zero skew and principal point at image center. // Focal length estimated assuming zero skew and principal point at image center.
// Under these assumptions, the Image of the Absolute Conic (IAC) simplifies, // Under these assumptions, the Image of the Absolute Conic (IAC) simplifies,
@@ -131,26 +150,37 @@ fun estimateRealDimensions(
// Camera ray through a corner: K⁻¹ · (u, v, 1) // Camera ray through a corner: K⁻¹ · (u, v, 1)
fun ray(p: Point) = Vector3D((p.x - cx) / f, (p.y - cy) / f, 1.0) fun ray(p: Point) = Vector3D((p.x - cx) / f, (p.y - cy) / f, 1.0)
// Intersect ray with document plane: X = t·r where t = 1 / (n·r) // Scale factor: either from subject distance, or arbitrary (ratio only)
// We assume an arbitrary plane distance (d = 1). Absolute scale is wrong, val subjectDistance = opticalMeasures?.subjectDistance?.toDouble()
// but cancels out when computing length ratios. val scale: Double? = if (subjectDistance != null) {
// Project subject distance onto the plane normal to get perpendicular distance
val centerX = (quad.topLeft.x + quad.topRight.x + quad.bottomLeft.x + quad.bottomRight.x) / 4.0
val centerY = (quad.topLeft.y + quad.topRight.y + quad.bottomLeft.y + quad.bottomRight.y) / 4.0
val centerRay = ray(Point(centerX, centerY)).let { it * (1.0 / it.norm()) }
val cosAngle = centerRay.dotProduct(n).absoluteValue
if (cosAngle < 0.1) null // document too tilted, unreliable
else subjectDistance * cosAngle
} else null
// Intersect ray with document plane: X = t·r where t = d / (n·r)
// When subjectDistance is unavailable, we assume an arbitrary plane distance (d = 1): absolute
// scale is wrong, but cancels out when computing length ratios.
fun corner3D(p: Point): Vector3D { fun corner3D(p: Point): Vector3D {
val r = ray(p) val r = ray(p)
return r * (1.0 / n.dotProduct(r)) val t = if (scale != null) scale / n.dotProduct(r) else 1.0 / n.dotProduct(r)
return r * t
} }
val xTL = corner3D(quad.topLeft); val xTR = corner3D(quad.topRight) val xTL = corner3D(quad.topLeft); val xTR = corner3D(quad.topRight)
val xBR = corner3D(quad.bottomRight); val xBL = corner3D(quad.bottomLeft) val xBR = corner3D(quad.bottomRight); val xBL = corner3D(quad.bottomLeft)
// Side lengths in reconstructed 3D space (up to an unknown global scale) // Side lengths in reconstructed 3D space
val realW = ((xTR - xTL).norm() + (xBR - xBL).norm()) / 2 val realW = ((xTR - xTL).norm() + (xBR - xBL).norm()) / 2
val realH = ((xBL - xTL).norm() + (xBR - xTR).norm()) / 2 val realH = ((xBL - xTL).norm() + (xBR - xTR).norm()) / 2
// Output dimensions: preserve projected area, apply corrected aspect ratio return if (opticalMeasures != null && scale != null) {
val ratio = realH / realW EstimatedDimensions.Physical(realW, realH)
val (projW, projH) = averageSides() } else {
val targetWidth = sqrt(projW * projH / ratio) EstimatedDimensions.Ratio(realW, realH)
val targetHeight = targetWidth * ratio }
return Pair(targetWidth, targetHeight)
} }