Use focus distance to estimate physical size
This commit is contained in:
@@ -15,7 +15,6 @@
|
||||
package org.fairscan.app.data
|
||||
|
||||
import kotlinx.serialization.Serializable
|
||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
||||
import org.fairscan.imageprocessing.ColorMode
|
||||
|
||||
@Serializable
|
||||
@@ -47,6 +46,9 @@ data class PageV2(
|
||||
val colorMode: ColorMode? = null,
|
||||
val focalLength: Float? = null,
|
||||
val sensorWidth: Float? = null,
|
||||
val subjectDistance: Float? = null,
|
||||
val sourceWidth: Int? = null,
|
||||
val sourceHeight: Int? = null,
|
||||
)
|
||||
|
||||
@Serializable
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
*/
|
||||
package org.fairscan.app.data
|
||||
|
||||
import org.fairscan.app.domain.JpegProvider
|
||||
import org.fairscan.app.domain.PageToExport
|
||||
import java.io.File
|
||||
import java.io.FileOutputStream
|
||||
import java.io.OutputStream
|
||||
@@ -26,7 +26,7 @@ data class GeneratedPdf(
|
||||
)
|
||||
|
||||
fun interface PdfWriter {
|
||||
suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int
|
||||
suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int
|
||||
}
|
||||
|
||||
class FileManager(
|
||||
@@ -43,12 +43,12 @@ class FileManager(
|
||||
}
|
||||
}
|
||||
|
||||
suspend fun generatePdf(jpegs: List<JpegProvider>): GeneratedPdf {
|
||||
suspend fun generatePdf(pages: List<PageToExport>): GeneratedPdf {
|
||||
pdfDir.mkdirs()
|
||||
require(pdfDir.exists() && pdfDir.isDirectory) { "Invalid pdfDir: $pdfDir" }
|
||||
val file = File(pdfDir, "${System.currentTimeMillis()}.pdf")
|
||||
val pageCount = FileOutputStream(file).use {
|
||||
pdfWriter.writePdfFromJpegs(jpegs, it)
|
||||
pdfWriter.writePdfFromJpegs(pages, it)
|
||||
}
|
||||
val sizeBytes = file.length()
|
||||
return GeneratedPdf(file, sizeBytes, pageCount)
|
||||
|
||||
@@ -33,6 +33,8 @@ import org.fairscan.app.domain.PageViewKey
|
||||
import org.fairscan.app.domain.Rotation
|
||||
import org.fairscan.app.domain.ScanPage
|
||||
import org.fairscan.imageprocessing.ColorMode
|
||||
import org.fairscan.imageprocessing.ImageSize
|
||||
import org.fairscan.imageprocessing.OpticalMeasures
|
||||
import org.fairscan.imageprocessing.Point
|
||||
import org.fairscan.imageprocessing.Quad
|
||||
import org.fairscan.imageprocessing.cameraIntrinsics
|
||||
@@ -154,8 +156,11 @@ class ImageRepository(
|
||||
manualRotationDegrees = Rotation.R0.degrees,
|
||||
isColored = metadata.autoColorMode == ColorMode.COLOR,
|
||||
colorMode = colorMode,
|
||||
focalLength = metadata.cameraIntrinsics?.focalLength,
|
||||
sensorWidth = metadata.cameraIntrinsics?.sensorWidth,
|
||||
focalLength = metadata.opticalMeasures?.cameraIntrinsics?.focalLength,
|
||||
sensorWidth = metadata.opticalMeasures?.cameraIntrinsics?.sensorWidth,
|
||||
subjectDistance = metadata.opticalMeasures?.subjectDistance,
|
||||
sourceWidth = metadata.sourceSize?.width?.toInt(),
|
||||
sourceHeight = metadata.sourceSize?.height?.toInt(),
|
||||
)
|
||||
)
|
||||
saveMetadata()
|
||||
@@ -402,10 +407,17 @@ fun NormalizedQuad.toQuad(): Quad =
|
||||
|
||||
fun PageV2.toMetadata(): PageMetadata? {
|
||||
if (quad == null || isColored == null) return null
|
||||
val cameraIntrinsics = cameraIntrinsics(focalLength, sensorWidth)
|
||||
val sourceSize =
|
||||
if (sourceWidth != null && sourceHeight != null)
|
||||
ImageSize(sourceWidth, sourceHeight)
|
||||
else
|
||||
null
|
||||
return PageMetadata(
|
||||
(userQuad ?: quad).toQuad(),
|
||||
Rotation.fromDegrees(baseRotationDegrees),
|
||||
if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE,
|
||||
cameraIntrinsics(focalLength, sensorWidth)
|
||||
sourceSize,
|
||||
cameraIntrinsics?.let { OpticalMeasures(it, subjectDistance) },
|
||||
)
|
||||
}
|
||||
|
||||
@@ -16,26 +16,55 @@ package org.fairscan.app.domain
|
||||
|
||||
import org.fairscan.app.data.ImageRepository
|
||||
import org.fairscan.app.platform.processedImage
|
||||
import org.fairscan.imageprocessing.EstimatedDimensions
|
||||
import org.fairscan.imageprocessing.estimateRealDimensions
|
||||
import org.fairscan.imageprocessing.resizeForMaxPixels
|
||||
import org.fairscan.imageprocessing.scaledTo
|
||||
import org.opencv.core.Mat
|
||||
|
||||
fun interface JpegProvider {
|
||||
suspend fun get(): Jpeg
|
||||
}
|
||||
|
||||
suspend fun jpegsForExport(
|
||||
data class PageToExport(
|
||||
val metadata: PageMetadata?,
|
||||
val jpeg: JpegProvider,
|
||||
) {
|
||||
fun estimatedDimensions(): EstimatedDimensions? {
|
||||
if (metadata == null)
|
||||
return null
|
||||
val size = metadata.sourceSize
|
||||
if (size == null)
|
||||
return null
|
||||
val quad = metadata.normalizedQuad.scaledTo(1.0, 1.0, size.width, size.height)
|
||||
val realDimensions = estimateRealDimensions(
|
||||
quad, size.width.toInt(), size.height.toInt(), metadata.opticalMeasures
|
||||
)
|
||||
return realDimensions.applyRotation(metadata.baseRotation)
|
||||
}
|
||||
}
|
||||
|
||||
private fun EstimatedDimensions.applyRotation(rotation: Rotation): EstimatedDimensions {
|
||||
if ((rotation == Rotation.R90 || rotation == Rotation.R270)
|
||||
&& this is EstimatedDimensions.Physical) {
|
||||
return EstimatedDimensions.Physical(heightMm, widthMm)
|
||||
}
|
||||
return this
|
||||
}
|
||||
|
||||
suspend fun pagesToExport(
|
||||
imageRepository: ImageRepository,
|
||||
exportQuality: ExportQuality
|
||||
): List<JpegProvider> {
|
||||
): List<PageToExport> {
|
||||
|
||||
val pages = imageRepository.pages()
|
||||
return when (exportQuality) {
|
||||
ExportQuality.BALANCED -> pages.map {
|
||||
JpegProvider { jpeg(it, imageRepository) }
|
||||
PageToExport(it.metadata) { jpeg(it, imageRepository) }
|
||||
}
|
||||
|
||||
ExportQuality.LOW -> pages.map { page ->
|
||||
JpegProvider {
|
||||
PageToExport(page.metadata) {
|
||||
resizeJpegBytesForMaxPixels(
|
||||
jpeg = jpeg(page, imageRepository),
|
||||
maxPixels = exportQuality.maxPixels.toDouble(),
|
||||
@@ -45,7 +74,7 @@ suspend fun jpegsForExport(
|
||||
}
|
||||
|
||||
ExportQuality.HIGH -> pages.map { page ->
|
||||
JpegProvider {
|
||||
PageToExport(page.metadata) {
|
||||
val source = imageRepository.source(page.id)
|
||||
val metadata = page.metadata
|
||||
val colorMode = page.colorMode
|
||||
|
||||
@@ -14,15 +14,17 @@
|
||||
*/
|
||||
package org.fairscan.app.domain
|
||||
|
||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
||||
import org.fairscan.imageprocessing.ColorMode
|
||||
import org.fairscan.imageprocessing.ImageSize
|
||||
import org.fairscan.imageprocessing.OpticalMeasures
|
||||
import org.fairscan.imageprocessing.Quad
|
||||
|
||||
data class PageMetadata(
|
||||
val normalizedQuad: Quad,
|
||||
val baseRotation: Rotation,
|
||||
val autoColorMode: ColorMode,
|
||||
val cameraIntrinsics: CameraIntrinsics?,
|
||||
val sourceSize: ImageSize?,
|
||||
val opticalMeasures: OpticalMeasures?,
|
||||
)
|
||||
|
||||
data class ScanPage(
|
||||
|
||||
@@ -22,33 +22,38 @@ import com.tom_roush.pdfbox.pdmodel.common.PDRectangle
|
||||
import com.tom_roush.pdfbox.pdmodel.graphics.image.JPEGFactory
|
||||
import org.fairscan.app.BuildConfig
|
||||
import org.fairscan.app.data.PdfWriter
|
||||
import org.fairscan.app.domain.JpegProvider
|
||||
import org.fairscan.app.domain.PageToExport
|
||||
import org.fairscan.imageprocessing.EstimatedDimensions
|
||||
import java.io.OutputStream
|
||||
import java.util.Calendar
|
||||
|
||||
class AndroidPdfWriter : PdfWriter {
|
||||
override suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int {
|
||||
override suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int {
|
||||
val doc = PDDocument()
|
||||
doc.documentInformation.creationDate = Calendar.getInstance()
|
||||
doc.documentInformation.creator = "FairScan ${BuildConfig.VERSION_NAME}"
|
||||
doc.use { document ->
|
||||
for (jpegBytes in jpegs) {
|
||||
val image = JPEGFactory.createFromByteArray(document, jpegBytes.get().bytes)
|
||||
for (page in pages) {
|
||||
val image = JPEGFactory.createFromByteArray(document, page.jpeg.get().bytes)
|
||||
|
||||
// Let's say that the physical dimensions of the page are close to US Letter
|
||||
// US Letter: 215.9×279.4 mm (A4: 210×297 mm)
|
||||
val maxDimInMm = 279.4f
|
||||
// PDF has 72 points (units) per inch, 1 inch = 25.4 mm
|
||||
val pointsPerMm = 72f / 25.4f
|
||||
|
||||
val widthPx = image.width.toFloat()
|
||||
val heightPx = image.height.toFloat()
|
||||
|
||||
val maxPx = maxOf(widthPx, heightPx)
|
||||
val scalePxToMm = maxDimInMm / maxPx
|
||||
|
||||
val widthPoints = widthPx * scalePxToMm * pointsPerMm
|
||||
val heightPoints = heightPx * scalePxToMm * pointsPerMm
|
||||
val dimensions = page.estimatedDimensions()
|
||||
val (widthPoints, heightPoints) = when (dimensions) {
|
||||
is EstimatedDimensions.Physical -> {
|
||||
dimensions.widthMm.toFloat() * pointsPerMm to dimensions.heightMm.toFloat() * pointsPerMm
|
||||
}
|
||||
else -> {
|
||||
// No physical dimensions available: approximate using US Letter max dimension
|
||||
val maxDimInMm = 279.4f
|
||||
val scalePxToMm = maxDimInMm / maxOf(widthPx, heightPx)
|
||||
widthPx * scalePxToMm * pointsPerMm to heightPx * scalePxToMm * pointsPerMm
|
||||
}
|
||||
}
|
||||
|
||||
val page = PDPage(PDRectangle(widthPoints, heightPoints))
|
||||
document.addPage(page)
|
||||
|
||||
@@ -25,9 +25,10 @@ import org.fairscan.app.domain.Jpeg
|
||||
import org.fairscan.app.domain.PageMetadata
|
||||
import org.fairscan.app.domain.Rotation
|
||||
import org.fairscan.app.ui.screens.settings.DefaultColorMode
|
||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
||||
import org.fairscan.imageprocessing.ColorMode
|
||||
import org.fairscan.imageprocessing.ImageSize
|
||||
import org.fairscan.imageprocessing.Mask
|
||||
import org.fairscan.imageprocessing.OpticalMeasures
|
||||
import org.fairscan.imageprocessing.Point
|
||||
import org.fairscan.imageprocessing.Quad
|
||||
import org.fairscan.imageprocessing.autoColorMode
|
||||
@@ -102,7 +103,7 @@ fun processedImage(
|
||||
sourceMat = source.toMat()
|
||||
val quad = metadata.normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height())
|
||||
page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
|
||||
metadata.cameraIntrinsics)
|
||||
metadata.opticalMeasures)
|
||||
return Jpeg.fromMat(page, exportQuality.jpegQuality)
|
||||
} finally {
|
||||
sourceMat?.release()
|
||||
@@ -117,7 +118,7 @@ fun extractDocumentFromBitmap(
|
||||
mask: Mask?,
|
||||
viewModelScope: CoroutineScope,
|
||||
defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO,
|
||||
cameraIntrinsics: CameraIntrinsics?,
|
||||
opticalMeasures: OpticalMeasures?,
|
||||
): CapturedPage {
|
||||
val exportQuality = ExportQuality.BALANCED
|
||||
var colorMode = ColorMode.COLOR
|
||||
@@ -144,7 +145,7 @@ fun extractDocumentFromBitmap(
|
||||
autoColorMode = autoColorMode(bgr, mask, quad)
|
||||
colorMode = defaultColorMode.colorMode ?: autoColorMode
|
||||
page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
|
||||
cameraIntrinsics)
|
||||
opticalMeasures)
|
||||
}
|
||||
|
||||
val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality)
|
||||
@@ -152,7 +153,9 @@ fun extractDocumentFromBitmap(
|
||||
page.release()
|
||||
|
||||
val baseRotation = Rotation.fromDegrees(rotationDegrees)
|
||||
val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode, cameraIntrinsics)
|
||||
val sourceSize = ImageSize(source.width, source.height)
|
||||
val metadata =
|
||||
PageMetadata(normalizedQuad, baseRotation, autoColorMode, sourceSize, opticalMeasures)
|
||||
val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) {
|
||||
compressSource(source)
|
||||
}
|
||||
|
||||
@@ -15,12 +15,18 @@
|
||||
package org.fairscan.app.ui.screens.camera
|
||||
|
||||
import android.graphics.Bitmap
|
||||
import android.hardware.camera2.CameraCaptureSession
|
||||
import android.hardware.camera2.CameraMetadata
|
||||
import android.hardware.camera2.CaptureRequest
|
||||
import android.hardware.camera2.CaptureResult
|
||||
import android.hardware.camera2.TotalCaptureResult
|
||||
import android.util.Log
|
||||
import android.util.Size
|
||||
import android.view.ViewGroup.LayoutParams.MATCH_PARENT
|
||||
import android.widget.LinearLayout
|
||||
import androidx.annotation.OptIn
|
||||
import androidx.camera.camera2.interop.Camera2CameraInfo
|
||||
import androidx.camera.camera2.interop.Camera2Interop
|
||||
import androidx.camera.camera2.interop.ExperimentalCamera2Interop
|
||||
import androidx.camera.core.CameraControl
|
||||
import androidx.camera.core.CameraSelector
|
||||
@@ -69,6 +75,7 @@ import androidx.lifecycle.LifecycleOwner
|
||||
import androidx.lifecycle.compose.LocalLifecycleOwner
|
||||
import org.fairscan.app.ui.components.CameraPermissionState
|
||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
||||
import org.fairscan.imageprocessing.OpticalMeasures
|
||||
import org.fairscan.imageprocessing.Point
|
||||
import org.fairscan.imageprocessing.Quad
|
||||
import org.fairscan.imageprocessing.cameraIntrinsics
|
||||
@@ -193,7 +200,7 @@ fun bindCameraUseCases(
|
||||
.setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build()
|
||||
imageAnalysis.setAnalyzer(executor, onImageAnalyzed)
|
||||
|
||||
val imageCapture = ImageCapture.Builder()
|
||||
val imageCaptureBuilder = ImageCapture.Builder()
|
||||
.setResolutionSelector(
|
||||
ResolutionSelector.Builder()
|
||||
.setResolutionStrategy(
|
||||
@@ -208,7 +215,21 @@ fun bindCameraUseCases(
|
||||
.build()
|
||||
)
|
||||
.setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY)
|
||||
.build()
|
||||
|
||||
Camera2Interop.Extender(imageCaptureBuilder)
|
||||
.setSessionCaptureCallback(object : CameraCaptureSession.CaptureCallback() {
|
||||
override fun onCaptureCompleted(
|
||||
session: CameraCaptureSession,
|
||||
request: CaptureRequest,
|
||||
result: TotalCaptureResult
|
||||
) {
|
||||
result.get(CaptureResult.LENS_FOCUS_DISTANCE)?.let {
|
||||
captureController.lastFocusDistanceDiopters = it
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
val imageCapture = imageCaptureBuilder.build()
|
||||
captureController.imageCapture = imageCapture
|
||||
|
||||
val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector,
|
||||
@@ -296,21 +317,34 @@ class CameraCaptureController {
|
||||
private val executor = Executors.newSingleThreadExecutor()
|
||||
var previewView: PreviewView? = null
|
||||
var cameraIntrinsics: CameraIntrinsics? = null
|
||||
var canUseFocusDistance = false
|
||||
|
||||
@Volatile
|
||||
var lastFocusDistanceDiopters: Float? = null
|
||||
|
||||
fun shutdown() {
|
||||
executor.shutdown()
|
||||
}
|
||||
|
||||
fun takePicture(onImageCaptured: (ImageProxy?, CameraIntrinsics?) -> Unit) {
|
||||
fun takePicture(onImageCaptured: (ImageProxy?, OpticalMeasures?) -> Unit) {
|
||||
imageCapture?.takePicture(
|
||||
executor,
|
||||
object : ImageCapture.OnImageCapturedCallback() {
|
||||
override fun onCaptureSuccess(imageProxy: ImageProxy) {
|
||||
onImageCaptured(imageProxy, cameraIntrinsics)
|
||||
val diopters = lastFocusDistanceDiopters
|
||||
val subjectDistanceInMm =
|
||||
if (canUseFocusDistance && diopters != null && diopters != 0.0f) {
|
||||
1000 / diopters
|
||||
} else {
|
||||
null
|
||||
}
|
||||
onImageCaptured(
|
||||
imageProxy,
|
||||
cameraIntrinsics?.let { OpticalMeasures(it, subjectDistanceInMm) })
|
||||
}
|
||||
override fun onError(exception: ImageCaptureException) {
|
||||
Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception)
|
||||
onImageCaptured(null, cameraIntrinsics)
|
||||
onImageCaptured(null, null)
|
||||
}
|
||||
}
|
||||
)
|
||||
@@ -344,6 +378,12 @@ class CameraCaptureController {
|
||||
} else {
|
||||
cameraIntrinsics(focalLengths[0], max(sensorSize.width, sensorSize.height))
|
||||
}
|
||||
val calibration = cameraInfo.getCameraCharacteristic(
|
||||
android.hardware.camera2.CameraCharacteristics.LENS_INFO_FOCUS_DISTANCE_CALIBRATION
|
||||
)
|
||||
canUseFocusDistance =
|
||||
calibration == CameraMetadata.LENS_INFO_FOCUS_DISTANCE_CALIBRATION_CALIBRATED
|
||||
|| calibration == CameraMetadata.LENS_INFO_FOCUS_DISTANCE_CALIBRATION_APPROXIMATE
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -229,8 +229,8 @@ fun CameraScreen(
|
||||
Log.i("FairScan", "Pressed <Capture>")
|
||||
cameraViewModel.onCapturePressed(it)
|
||||
captureController.takePicture(
|
||||
onImageCaptured = { imageProxy, cameraCharacteristics ->
|
||||
cameraViewModel.onImageCaptured(imageProxy, cameraCharacteristics) }
|
||||
onImageCaptured = { imageProxy, opticalMeasures ->
|
||||
cameraViewModel.onImageCaptured(imageProxy, opticalMeasures) }
|
||||
)
|
||||
}
|
||||
},
|
||||
@@ -648,7 +648,7 @@ fun CameraScreenPreviewWithProcessedImage() {
|
||||
CapturedPage(
|
||||
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"),
|
||||
CompletableDeferred(Jpeg(ByteArray(0))),
|
||||
PageMetadata(quad, R0, ColorMode.COLOR, null),
|
||||
PageMetadata(quad, R0, ColorMode.COLOR, null, null),
|
||||
ColorMode.COLOR)))
|
||||
}
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ import org.fairscan.app.domain.CapturedPage
|
||||
import org.fairscan.app.platform.extractDocumentFromBitmap
|
||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
||||
import org.fairscan.imageprocessing.ImageSize
|
||||
import org.fairscan.imageprocessing.OpticalMeasures
|
||||
import org.fairscan.imageprocessing.detectDocumentQuad
|
||||
import java.util.concurrent.CancellationException
|
||||
|
||||
@@ -134,13 +135,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
||||
}
|
||||
}
|
||||
|
||||
fun onImageCaptured(imageProxy: ImageProxy?, cameraIntrinsics: CameraIntrinsics?) {
|
||||
fun onImageCaptured(imageProxy: ImageProxy?, opticalMeasures: OpticalMeasures?) {
|
||||
if (imageProxy != null) {
|
||||
viewModelScope.launch {
|
||||
try {
|
||||
val source = imageProxy.toBitmap()
|
||||
val rotationDegrees = imageProxy.imageInfo.rotationDegrees
|
||||
val page = processCapturedImage(source, rotationDegrees, cameraIntrinsics)
|
||||
val page = processCapturedImage(source, rotationDegrees, opticalMeasures)
|
||||
imageProxy.close()
|
||||
onCaptureProcessed(page)
|
||||
} catch (e: RuntimeException) {
|
||||
@@ -156,7 +157,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
||||
private suspend fun processCapturedImage(
|
||||
source: Bitmap,
|
||||
rotationDegrees: Int,
|
||||
cameraIntrinsics: CameraIntrinsics?,
|
||||
opticalMeasures: OpticalMeasures?,
|
||||
): CapturedPage = withContext(Dispatchers.IO) {
|
||||
val segmentation = imageSegmentationService.runSegmentationAndReturn(source)
|
||||
val mask = segmentation?.segmentation
|
||||
@@ -164,7 +165,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
||||
val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) }
|
||||
val defaultColorMode = settingsRepository.defaultColorMode.first()
|
||||
val result = extractDocumentFromBitmap(
|
||||
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, cameraIntrinsics)
|
||||
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, opticalMeasures)
|
||||
return@withContext result
|
||||
}
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ import org.fairscan.app.data.FileManager
|
||||
import org.fairscan.app.data.ImageRepository
|
||||
import org.fairscan.app.domain.ExportQuality
|
||||
import org.fairscan.app.domain.PageViewKey
|
||||
import org.fairscan.app.domain.jpegsForExport
|
||||
import org.fairscan.app.domain.pagesToExport
|
||||
import org.fairscan.app.ui.screens.settings.ExportFormat
|
||||
import java.io.File
|
||||
import java.io.FileInputStream
|
||||
@@ -76,8 +76,8 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit
|
||||
private suspend fun generatePdf(
|
||||
exportQuality: ExportQuality
|
||||
): ExportResult.Pdf = withContext(Dispatchers.IO) {
|
||||
val jpegs = jpegsForExport(imageRepository, exportQuality)
|
||||
val pdf = fileManager.generatePdf(jpegs)
|
||||
val pageToExports = pagesToExport(imageRepository, exportQuality)
|
||||
val pdf = fileManager.generatePdf(pageToExports)
|
||||
return@withContext ExportResult.Pdf(pdf.file, pdf.sizeInBytes, pdf.pageCount)
|
||||
}
|
||||
|
||||
@@ -181,12 +181,12 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit
|
||||
private suspend fun generateJpegs(
|
||||
exportQuality: ExportQuality
|
||||
): ExportResult.Jpeg = withContext(Dispatchers.IO) {
|
||||
val jpegs = jpegsForExport(imageRepository, exportQuality)
|
||||
val pageToExports = pagesToExport(imageRepository, exportQuality)
|
||||
val timestamp = System.currentTimeMillis()
|
||||
preparationDir.mkdirs()
|
||||
val files = jpegs.mapIndexed { index, jpeg ->
|
||||
val files = pageToExports.mapIndexed { index, page ->
|
||||
val file = File(preparationDir, "$timestamp-${index + 1}.jpg")
|
||||
file.writeBytes(jpeg.get().bytes)
|
||||
file.writeBytes(page.jpeg.get().bytes)
|
||||
file
|
||||
}.toList()
|
||||
val sizeInBytes = files.sumOf { it.length() }
|
||||
|
||||
@@ -17,7 +17,7 @@ package org.fairscan.app.data
|
||||
import kotlinx.coroutines.test.runTest
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
import org.fairscan.app.domain.Jpeg
|
||||
import org.fairscan.app.domain.JpegProvider
|
||||
import org.fairscan.app.domain.PageToExport
|
||||
import org.junit.Test
|
||||
import java.io.File
|
||||
import java.io.OutputStream
|
||||
@@ -73,15 +73,16 @@ class FileManagerTest {
|
||||
@Test
|
||||
fun generatePdf() = runTest {
|
||||
val fakePdfWriter = object : PdfWriter {
|
||||
override suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int {
|
||||
val list = jpegs.toList()
|
||||
list.forEach { bytes -> outputStream.write(bytes.get().bytes) }
|
||||
override suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int {
|
||||
val list = pages.toList()
|
||||
list.forEach { page -> outputStream.write(page.jpeg.get().bytes) }
|
||||
return list.size
|
||||
}
|
||||
}
|
||||
val manager = FileManager(pdfDir, externalDir, fakePdfWriter)
|
||||
val jpegs = listOf(byteArrayOf(0x01, 0x02), byteArrayOf(0x11)).map { JpegProvider { Jpeg(it) } }
|
||||
val pdf = manager.generatePdf(jpegs)
|
||||
val pages = listOf(byteArrayOf(0x01, 0x02), byteArrayOf(0x11))
|
||||
.map { PageToExport(null) { Jpeg(it) } }
|
||||
val pdf = manager.generatePdf(pages)
|
||||
assertThat(pdf.pageCount).isEqualTo(2)
|
||||
assertThat(pdf.sizeInBytes).isEqualTo(3)
|
||||
assertThat(pdf.file.readBytes()).isEqualTo(byteArrayOf(0x01, 0x02, 0x11))
|
||||
|
||||
@@ -26,7 +26,6 @@ import org.assertj.core.api.Assertions.assertThat
|
||||
import org.fairscan.app.domain.Jpeg
|
||||
import org.fairscan.app.domain.PageMetadata
|
||||
import org.fairscan.app.domain.PageViewKey
|
||||
import org.fairscan.app.domain.Rotation
|
||||
import org.fairscan.app.domain.Rotation.R0
|
||||
import org.fairscan.app.domain.Rotation.R180
|
||||
import org.fairscan.app.domain.Rotation.R270
|
||||
@@ -35,6 +34,8 @@ import org.fairscan.imageprocessing.CameraIntrinsics
|
||||
import org.fairscan.imageprocessing.ColorMode
|
||||
import org.fairscan.imageprocessing.ColorMode.COLOR
|
||||
import org.fairscan.imageprocessing.ColorMode.GRAYSCALE
|
||||
import org.fairscan.imageprocessing.ImageSize
|
||||
import org.fairscan.imageprocessing.OpticalMeasures
|
||||
import org.fairscan.imageprocessing.Point
|
||||
import org.fairscan.imageprocessing.Quad
|
||||
import org.junit.Rule
|
||||
@@ -52,8 +53,9 @@ class ImageRepositoryTest {
|
||||
private val testScope = TestScope()
|
||||
|
||||
val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09))
|
||||
val intrinsics = CameraIntrinsics(42.0f, 43.0f)
|
||||
val metadata1 = PageMetadata(quad1, R90, COLOR, intrinsics)
|
||||
val opticalMeasures = OpticalMeasures(CameraIntrinsics(42.0f, 43.0f), 44.0f)
|
||||
val sourceSize = ImageSize(1600, 1200)
|
||||
val metadata1 = PageMetadata(quad1, R90, COLOR, sourceSize, opticalMeasures)
|
||||
|
||||
fun getFilesDir(): File {
|
||||
if (_filesDir == null) {
|
||||
|
||||
@@ -9,7 +9,7 @@ espressoCore = "3.7.0"
|
||||
lifecycleRuntimeKtx = "2.10.0"
|
||||
activityCompose = "1.13.0"
|
||||
composeBom = "2026.03.00"
|
||||
camerax = "1.5.3"
|
||||
camerax = "1.6.1"
|
||||
datastore = "1.2.1"
|
||||
documentfile = "1.1.0"
|
||||
litert = "1.4.1"
|
||||
|
||||
@@ -25,6 +25,7 @@ import org.opencv.core.MatOfPoint2f
|
||||
import org.opencv.core.Size
|
||||
import org.opencv.imgproc.Imgproc
|
||||
import kotlin.math.abs
|
||||
import kotlin.math.sqrt
|
||||
|
||||
interface Mask {
|
||||
val width: Int
|
||||
@@ -156,14 +157,15 @@ fun extractDocument(
|
||||
rotationDegrees: Int,
|
||||
colorMode: ColorMode,
|
||||
maxPixels: Long,
|
||||
cameraIntrinsics: CameraIntrinsics? = null,
|
||||
opticalMeasures: OpticalMeasures? = null,
|
||||
): Mat {
|
||||
val (targetWidth, targetHeight) = estimateRealDimensions(
|
||||
val estimatedDimensions = estimateRealDimensions(
|
||||
quad,
|
||||
inputMat.cols(),
|
||||
inputMat.rows(),
|
||||
cameraIntrinsics
|
||||
opticalMeasures,
|
||||
)
|
||||
val (targetWidth, targetHeight) = estimatedDimensions.toPixelDimensions(quad)
|
||||
val srcPoints = MatOfPoint2f(
|
||||
quad.topLeft.toCv(),
|
||||
quad.topRight.toCv(),
|
||||
@@ -193,6 +195,17 @@ fun extractDocument(
|
||||
return rotated
|
||||
}
|
||||
|
||||
fun EstimatedDimensions.toPixelDimensions(quad: Quad): Pair<Double, Double> {
|
||||
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
|
||||
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
|
||||
val projectedArea = w * h
|
||||
|
||||
val ratio = aspectRatio
|
||||
val targetWidth = sqrt(projectedArea / ratio)
|
||||
val targetHeight = targetWidth * ratio
|
||||
return Pair(targetWidth, targetHeight)
|
||||
}
|
||||
|
||||
fun rotate(input: Mat, degrees: Int): Mat {
|
||||
val output = Mat()
|
||||
when ((degrees % 360 + 360) % 360) {
|
||||
|
||||
@@ -47,6 +47,24 @@ fun cameraIntrinsics(focalLengthInMm: Float?, sensorWidthInMm: Float?): CameraIn
|
||||
return CameraIntrinsics(focalLengthInMm, sensorWidthInMm)
|
||||
}
|
||||
|
||||
data class OpticalMeasures(
|
||||
val cameraIntrinsics: CameraIntrinsics,
|
||||
// in millimeters
|
||||
val subjectDistance: Float?,
|
||||
)
|
||||
|
||||
sealed class EstimatedDimensions {
|
||||
// Dimensions in mm, when subject distance is available
|
||||
data class Physical(val widthMm: Double, val heightMm: Double) : EstimatedDimensions()
|
||||
// Dimensions in arbitrary units, only ratio is meaningful
|
||||
data class Ratio(val width: Double, val height: Double) : EstimatedDimensions()
|
||||
|
||||
val aspectRatio: Double get() = when (this) {
|
||||
is Physical -> heightMm / widthMm
|
||||
is Ratio -> height / width
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimates the true width and height of the document in the output image,
|
||||
* correcting for perspective distortion using projective geometry.
|
||||
@@ -63,13 +81,13 @@ fun estimateRealDimensions(
|
||||
quad: Quad,
|
||||
imageWidth: Int,
|
||||
imageHeight: Int,
|
||||
cameraIntrinsics: CameraIntrinsics?
|
||||
): Pair<Double, Double> {
|
||||
opticalMeasures: OpticalMeasures?,
|
||||
): EstimatedDimensions {
|
||||
|
||||
fun averageSides(): Pair<Double, Double> {
|
||||
fun averageSides(): EstimatedDimensions.Ratio {
|
||||
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
|
||||
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
|
||||
return Pair(w, h)
|
||||
return EstimatedDimensions.Ratio(w, h)
|
||||
}
|
||||
|
||||
// Homogeneous 2D point
|
||||
@@ -97,8 +115,9 @@ fun estimateRealDimensions(
|
||||
val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy)
|
||||
val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy)
|
||||
|
||||
val f = if (cameraIntrinsics != null) {
|
||||
cameraIntrinsics.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble()
|
||||
val f = if (opticalMeasures != null) {
|
||||
opticalMeasures.cameraIntrinsics
|
||||
.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble()
|
||||
} else {
|
||||
// Focal length estimated assuming zero skew and principal point at image center.
|
||||
// Under these assumptions, the Image of the Absolute Conic (IAC) simplifies,
|
||||
@@ -131,26 +150,37 @@ fun estimateRealDimensions(
|
||||
// Camera ray through a corner: K⁻¹ · (u, v, 1)
|
||||
fun ray(p: Point) = Vector3D((p.x - cx) / f, (p.y - cy) / f, 1.0)
|
||||
|
||||
// Intersect ray with document plane: X = t·r where t = 1 / (n·r)
|
||||
// We assume an arbitrary plane distance (d = 1). Absolute scale is wrong,
|
||||
// but cancels out when computing length ratios.
|
||||
// Scale factor: either from subject distance, or arbitrary (ratio only)
|
||||
val subjectDistance = opticalMeasures?.subjectDistance?.toDouble()
|
||||
val scale: Double? = if (subjectDistance != null) {
|
||||
// Project subject distance onto the plane normal to get perpendicular distance
|
||||
val centerX = (quad.topLeft.x + quad.topRight.x + quad.bottomLeft.x + quad.bottomRight.x) / 4.0
|
||||
val centerY = (quad.topLeft.y + quad.topRight.y + quad.bottomLeft.y + quad.bottomRight.y) / 4.0
|
||||
val centerRay = ray(Point(centerX, centerY)).let { it * (1.0 / it.norm()) }
|
||||
val cosAngle = centerRay.dotProduct(n).absoluteValue
|
||||
if (cosAngle < 0.1) null // document too tilted, unreliable
|
||||
else subjectDistance * cosAngle
|
||||
} else null
|
||||
|
||||
// Intersect ray with document plane: X = t·r where t = d / (n·r)
|
||||
// When subjectDistance is unavailable, we assume an arbitrary plane distance (d = 1): absolute
|
||||
// scale is wrong, but cancels out when computing length ratios.
|
||||
fun corner3D(p: Point): Vector3D {
|
||||
val r = ray(p)
|
||||
return r * (1.0 / n.dotProduct(r))
|
||||
val t = if (scale != null) scale / n.dotProduct(r) else 1.0 / n.dotProduct(r)
|
||||
return r * t
|
||||
}
|
||||
|
||||
val xTL = corner3D(quad.topLeft); val xTR = corner3D(quad.topRight)
|
||||
val xBR = corner3D(quad.bottomRight); val xBL = corner3D(quad.bottomLeft)
|
||||
|
||||
// Side lengths in reconstructed 3D space (up to an unknown global scale)
|
||||
// Side lengths in reconstructed 3D space
|
||||
val realW = ((xTR - xTL).norm() + (xBR - xBL).norm()) / 2
|
||||
val realH = ((xBL - xTL).norm() + (xBR - xTR).norm()) / 2
|
||||
|
||||
// Output dimensions: preserve projected area, apply corrected aspect ratio
|
||||
val ratio = realH / realW
|
||||
val (projW, projH) = averageSides()
|
||||
val targetWidth = sqrt(projW * projH / ratio)
|
||||
val targetHeight = targetWidth * ratio
|
||||
|
||||
return Pair(targetWidth, targetHeight)
|
||||
return if (opticalMeasures != null && scale != null) {
|
||||
EstimatedDimensions.Physical(realW, realH)
|
||||
} else {
|
||||
EstimatedDimensions.Ratio(realW, realH)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user