Use focus distance to estimate physical size
This commit is contained in:
@@ -15,7 +15,6 @@
|
|||||||
package org.fairscan.app.data
|
package org.fairscan.app.data
|
||||||
|
|
||||||
import kotlinx.serialization.Serializable
|
import kotlinx.serialization.Serializable
|
||||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
|
||||||
import org.fairscan.imageprocessing.ColorMode
|
import org.fairscan.imageprocessing.ColorMode
|
||||||
|
|
||||||
@Serializable
|
@Serializable
|
||||||
@@ -47,6 +46,9 @@ data class PageV2(
|
|||||||
val colorMode: ColorMode? = null,
|
val colorMode: ColorMode? = null,
|
||||||
val focalLength: Float? = null,
|
val focalLength: Float? = null,
|
||||||
val sensorWidth: Float? = null,
|
val sensorWidth: Float? = null,
|
||||||
|
val subjectDistance: Float? = null,
|
||||||
|
val sourceWidth: Int? = null,
|
||||||
|
val sourceHeight: Int? = null,
|
||||||
)
|
)
|
||||||
|
|
||||||
@Serializable
|
@Serializable
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.fairscan.app.data
|
package org.fairscan.app.data
|
||||||
|
|
||||||
import org.fairscan.app.domain.JpegProvider
|
import org.fairscan.app.domain.PageToExport
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.io.FileOutputStream
|
import java.io.FileOutputStream
|
||||||
import java.io.OutputStream
|
import java.io.OutputStream
|
||||||
@@ -26,7 +26,7 @@ data class GeneratedPdf(
|
|||||||
)
|
)
|
||||||
|
|
||||||
fun interface PdfWriter {
|
fun interface PdfWriter {
|
||||||
suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int
|
suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int
|
||||||
}
|
}
|
||||||
|
|
||||||
class FileManager(
|
class FileManager(
|
||||||
@@ -43,12 +43,12 @@ class FileManager(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
suspend fun generatePdf(jpegs: List<JpegProvider>): GeneratedPdf {
|
suspend fun generatePdf(pages: List<PageToExport>): GeneratedPdf {
|
||||||
pdfDir.mkdirs()
|
pdfDir.mkdirs()
|
||||||
require(pdfDir.exists() && pdfDir.isDirectory) { "Invalid pdfDir: $pdfDir" }
|
require(pdfDir.exists() && pdfDir.isDirectory) { "Invalid pdfDir: $pdfDir" }
|
||||||
val file = File(pdfDir, "${System.currentTimeMillis()}.pdf")
|
val file = File(pdfDir, "${System.currentTimeMillis()}.pdf")
|
||||||
val pageCount = FileOutputStream(file).use {
|
val pageCount = FileOutputStream(file).use {
|
||||||
pdfWriter.writePdfFromJpegs(jpegs, it)
|
pdfWriter.writePdfFromJpegs(pages, it)
|
||||||
}
|
}
|
||||||
val sizeBytes = file.length()
|
val sizeBytes = file.length()
|
||||||
return GeneratedPdf(file, sizeBytes, pageCount)
|
return GeneratedPdf(file, sizeBytes, pageCount)
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ import org.fairscan.app.domain.PageViewKey
|
|||||||
import org.fairscan.app.domain.Rotation
|
import org.fairscan.app.domain.Rotation
|
||||||
import org.fairscan.app.domain.ScanPage
|
import org.fairscan.app.domain.ScanPage
|
||||||
import org.fairscan.imageprocessing.ColorMode
|
import org.fairscan.imageprocessing.ColorMode
|
||||||
|
import org.fairscan.imageprocessing.ImageSize
|
||||||
|
import org.fairscan.imageprocessing.OpticalMeasures
|
||||||
import org.fairscan.imageprocessing.Point
|
import org.fairscan.imageprocessing.Point
|
||||||
import org.fairscan.imageprocessing.Quad
|
import org.fairscan.imageprocessing.Quad
|
||||||
import org.fairscan.imageprocessing.cameraIntrinsics
|
import org.fairscan.imageprocessing.cameraIntrinsics
|
||||||
@@ -154,8 +156,11 @@ class ImageRepository(
|
|||||||
manualRotationDegrees = Rotation.R0.degrees,
|
manualRotationDegrees = Rotation.R0.degrees,
|
||||||
isColored = metadata.autoColorMode == ColorMode.COLOR,
|
isColored = metadata.autoColorMode == ColorMode.COLOR,
|
||||||
colorMode = colorMode,
|
colorMode = colorMode,
|
||||||
focalLength = metadata.cameraIntrinsics?.focalLength,
|
focalLength = metadata.opticalMeasures?.cameraIntrinsics?.focalLength,
|
||||||
sensorWidth = metadata.cameraIntrinsics?.sensorWidth,
|
sensorWidth = metadata.opticalMeasures?.cameraIntrinsics?.sensorWidth,
|
||||||
|
subjectDistance = metadata.opticalMeasures?.subjectDistance,
|
||||||
|
sourceWidth = metadata.sourceSize?.width?.toInt(),
|
||||||
|
sourceHeight = metadata.sourceSize?.height?.toInt(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
saveMetadata()
|
saveMetadata()
|
||||||
@@ -402,10 +407,17 @@ fun NormalizedQuad.toQuad(): Quad =
|
|||||||
|
|
||||||
fun PageV2.toMetadata(): PageMetadata? {
|
fun PageV2.toMetadata(): PageMetadata? {
|
||||||
if (quad == null || isColored == null) return null
|
if (quad == null || isColored == null) return null
|
||||||
|
val cameraIntrinsics = cameraIntrinsics(focalLength, sensorWidth)
|
||||||
|
val sourceSize =
|
||||||
|
if (sourceWidth != null && sourceHeight != null)
|
||||||
|
ImageSize(sourceWidth, sourceHeight)
|
||||||
|
else
|
||||||
|
null
|
||||||
return PageMetadata(
|
return PageMetadata(
|
||||||
(userQuad ?: quad).toQuad(),
|
(userQuad ?: quad).toQuad(),
|
||||||
Rotation.fromDegrees(baseRotationDegrees),
|
Rotation.fromDegrees(baseRotationDegrees),
|
||||||
if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE,
|
if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE,
|
||||||
cameraIntrinsics(focalLength, sensorWidth)
|
sourceSize,
|
||||||
|
cameraIntrinsics?.let { OpticalMeasures(it, subjectDistance) },
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,26 +16,55 @@ package org.fairscan.app.domain
|
|||||||
|
|
||||||
import org.fairscan.app.data.ImageRepository
|
import org.fairscan.app.data.ImageRepository
|
||||||
import org.fairscan.app.platform.processedImage
|
import org.fairscan.app.platform.processedImage
|
||||||
|
import org.fairscan.imageprocessing.EstimatedDimensions
|
||||||
|
import org.fairscan.imageprocessing.estimateRealDimensions
|
||||||
import org.fairscan.imageprocessing.resizeForMaxPixels
|
import org.fairscan.imageprocessing.resizeForMaxPixels
|
||||||
|
import org.fairscan.imageprocessing.scaledTo
|
||||||
import org.opencv.core.Mat
|
import org.opencv.core.Mat
|
||||||
|
|
||||||
fun interface JpegProvider {
|
fun interface JpegProvider {
|
||||||
suspend fun get(): Jpeg
|
suspend fun get(): Jpeg
|
||||||
}
|
}
|
||||||
|
|
||||||
suspend fun jpegsForExport(
|
data class PageToExport(
|
||||||
|
val metadata: PageMetadata?,
|
||||||
|
val jpeg: JpegProvider,
|
||||||
|
) {
|
||||||
|
fun estimatedDimensions(): EstimatedDimensions? {
|
||||||
|
if (metadata == null)
|
||||||
|
return null
|
||||||
|
val size = metadata.sourceSize
|
||||||
|
if (size == null)
|
||||||
|
return null
|
||||||
|
val quad = metadata.normalizedQuad.scaledTo(1.0, 1.0, size.width, size.height)
|
||||||
|
val realDimensions = estimateRealDimensions(
|
||||||
|
quad, size.width.toInt(), size.height.toInt(), metadata.opticalMeasures
|
||||||
|
)
|
||||||
|
return realDimensions.applyRotation(metadata.baseRotation)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun EstimatedDimensions.applyRotation(rotation: Rotation): EstimatedDimensions {
|
||||||
|
if ((rotation == Rotation.R90 || rotation == Rotation.R270)
|
||||||
|
&& this is EstimatedDimensions.Physical) {
|
||||||
|
return EstimatedDimensions.Physical(heightMm, widthMm)
|
||||||
|
}
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
suspend fun pagesToExport(
|
||||||
imageRepository: ImageRepository,
|
imageRepository: ImageRepository,
|
||||||
exportQuality: ExportQuality
|
exportQuality: ExportQuality
|
||||||
): List<JpegProvider> {
|
): List<PageToExport> {
|
||||||
|
|
||||||
val pages = imageRepository.pages()
|
val pages = imageRepository.pages()
|
||||||
return when (exportQuality) {
|
return when (exportQuality) {
|
||||||
ExportQuality.BALANCED -> pages.map {
|
ExportQuality.BALANCED -> pages.map {
|
||||||
JpegProvider { jpeg(it, imageRepository) }
|
PageToExport(it.metadata) { jpeg(it, imageRepository) }
|
||||||
}
|
}
|
||||||
|
|
||||||
ExportQuality.LOW -> pages.map { page ->
|
ExportQuality.LOW -> pages.map { page ->
|
||||||
JpegProvider {
|
PageToExport(page.metadata) {
|
||||||
resizeJpegBytesForMaxPixels(
|
resizeJpegBytesForMaxPixels(
|
||||||
jpeg = jpeg(page, imageRepository),
|
jpeg = jpeg(page, imageRepository),
|
||||||
maxPixels = exportQuality.maxPixels.toDouble(),
|
maxPixels = exportQuality.maxPixels.toDouble(),
|
||||||
@@ -45,7 +74,7 @@ suspend fun jpegsForExport(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ExportQuality.HIGH -> pages.map { page ->
|
ExportQuality.HIGH -> pages.map { page ->
|
||||||
JpegProvider {
|
PageToExport(page.metadata) {
|
||||||
val source = imageRepository.source(page.id)
|
val source = imageRepository.source(page.id)
|
||||||
val metadata = page.metadata
|
val metadata = page.metadata
|
||||||
val colorMode = page.colorMode
|
val colorMode = page.colorMode
|
||||||
|
|||||||
@@ -14,15 +14,17 @@
|
|||||||
*/
|
*/
|
||||||
package org.fairscan.app.domain
|
package org.fairscan.app.domain
|
||||||
|
|
||||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
|
||||||
import org.fairscan.imageprocessing.ColorMode
|
import org.fairscan.imageprocessing.ColorMode
|
||||||
|
import org.fairscan.imageprocessing.ImageSize
|
||||||
|
import org.fairscan.imageprocessing.OpticalMeasures
|
||||||
import org.fairscan.imageprocessing.Quad
|
import org.fairscan.imageprocessing.Quad
|
||||||
|
|
||||||
data class PageMetadata(
|
data class PageMetadata(
|
||||||
val normalizedQuad: Quad,
|
val normalizedQuad: Quad,
|
||||||
val baseRotation: Rotation,
|
val baseRotation: Rotation,
|
||||||
val autoColorMode: ColorMode,
|
val autoColorMode: ColorMode,
|
||||||
val cameraIntrinsics: CameraIntrinsics?,
|
val sourceSize: ImageSize?,
|
||||||
|
val opticalMeasures: OpticalMeasures?,
|
||||||
)
|
)
|
||||||
|
|
||||||
data class ScanPage(
|
data class ScanPage(
|
||||||
|
|||||||
@@ -22,33 +22,38 @@ import com.tom_roush.pdfbox.pdmodel.common.PDRectangle
|
|||||||
import com.tom_roush.pdfbox.pdmodel.graphics.image.JPEGFactory
|
import com.tom_roush.pdfbox.pdmodel.graphics.image.JPEGFactory
|
||||||
import org.fairscan.app.BuildConfig
|
import org.fairscan.app.BuildConfig
|
||||||
import org.fairscan.app.data.PdfWriter
|
import org.fairscan.app.data.PdfWriter
|
||||||
import org.fairscan.app.domain.JpegProvider
|
import org.fairscan.app.domain.PageToExport
|
||||||
|
import org.fairscan.imageprocessing.EstimatedDimensions
|
||||||
import java.io.OutputStream
|
import java.io.OutputStream
|
||||||
import java.util.Calendar
|
import java.util.Calendar
|
||||||
|
|
||||||
class AndroidPdfWriter : PdfWriter {
|
class AndroidPdfWriter : PdfWriter {
|
||||||
override suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int {
|
override suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int {
|
||||||
val doc = PDDocument()
|
val doc = PDDocument()
|
||||||
doc.documentInformation.creationDate = Calendar.getInstance()
|
doc.documentInformation.creationDate = Calendar.getInstance()
|
||||||
doc.documentInformation.creator = "FairScan ${BuildConfig.VERSION_NAME}"
|
doc.documentInformation.creator = "FairScan ${BuildConfig.VERSION_NAME}"
|
||||||
doc.use { document ->
|
doc.use { document ->
|
||||||
for (jpegBytes in jpegs) {
|
for (page in pages) {
|
||||||
val image = JPEGFactory.createFromByteArray(document, jpegBytes.get().bytes)
|
val image = JPEGFactory.createFromByteArray(document, page.jpeg.get().bytes)
|
||||||
|
|
||||||
// Let's say that the physical dimensions of the page are close to US Letter
|
|
||||||
// US Letter: 215.9×279.4 mm (A4: 210×297 mm)
|
|
||||||
val maxDimInMm = 279.4f
|
|
||||||
// PDF has 72 points (units) per inch, 1 inch = 25.4 mm
|
// PDF has 72 points (units) per inch, 1 inch = 25.4 mm
|
||||||
val pointsPerMm = 72f / 25.4f
|
val pointsPerMm = 72f / 25.4f
|
||||||
|
|
||||||
val widthPx = image.width.toFloat()
|
val widthPx = image.width.toFloat()
|
||||||
val heightPx = image.height.toFloat()
|
val heightPx = image.height.toFloat()
|
||||||
|
|
||||||
val maxPx = maxOf(widthPx, heightPx)
|
val dimensions = page.estimatedDimensions()
|
||||||
val scalePxToMm = maxDimInMm / maxPx
|
val (widthPoints, heightPoints) = when (dimensions) {
|
||||||
|
is EstimatedDimensions.Physical -> {
|
||||||
val widthPoints = widthPx * scalePxToMm * pointsPerMm
|
dimensions.widthMm.toFloat() * pointsPerMm to dimensions.heightMm.toFloat() * pointsPerMm
|
||||||
val heightPoints = heightPx * scalePxToMm * pointsPerMm
|
}
|
||||||
|
else -> {
|
||||||
|
// No physical dimensions available: approximate using US Letter max dimension
|
||||||
|
val maxDimInMm = 279.4f
|
||||||
|
val scalePxToMm = maxDimInMm / maxOf(widthPx, heightPx)
|
||||||
|
widthPx * scalePxToMm * pointsPerMm to heightPx * scalePxToMm * pointsPerMm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
val page = PDPage(PDRectangle(widthPoints, heightPoints))
|
val page = PDPage(PDRectangle(widthPoints, heightPoints))
|
||||||
document.addPage(page)
|
document.addPage(page)
|
||||||
|
|||||||
@@ -25,9 +25,10 @@ import org.fairscan.app.domain.Jpeg
|
|||||||
import org.fairscan.app.domain.PageMetadata
|
import org.fairscan.app.domain.PageMetadata
|
||||||
import org.fairscan.app.domain.Rotation
|
import org.fairscan.app.domain.Rotation
|
||||||
import org.fairscan.app.ui.screens.settings.DefaultColorMode
|
import org.fairscan.app.ui.screens.settings.DefaultColorMode
|
||||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
|
||||||
import org.fairscan.imageprocessing.ColorMode
|
import org.fairscan.imageprocessing.ColorMode
|
||||||
|
import org.fairscan.imageprocessing.ImageSize
|
||||||
import org.fairscan.imageprocessing.Mask
|
import org.fairscan.imageprocessing.Mask
|
||||||
|
import org.fairscan.imageprocessing.OpticalMeasures
|
||||||
import org.fairscan.imageprocessing.Point
|
import org.fairscan.imageprocessing.Point
|
||||||
import org.fairscan.imageprocessing.Quad
|
import org.fairscan.imageprocessing.Quad
|
||||||
import org.fairscan.imageprocessing.autoColorMode
|
import org.fairscan.imageprocessing.autoColorMode
|
||||||
@@ -102,7 +103,7 @@ fun processedImage(
|
|||||||
sourceMat = source.toMat()
|
sourceMat = source.toMat()
|
||||||
val quad = metadata.normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height())
|
val quad = metadata.normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height())
|
||||||
page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
|
page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
|
||||||
metadata.cameraIntrinsics)
|
metadata.opticalMeasures)
|
||||||
return Jpeg.fromMat(page, exportQuality.jpegQuality)
|
return Jpeg.fromMat(page, exportQuality.jpegQuality)
|
||||||
} finally {
|
} finally {
|
||||||
sourceMat?.release()
|
sourceMat?.release()
|
||||||
@@ -117,7 +118,7 @@ fun extractDocumentFromBitmap(
|
|||||||
mask: Mask?,
|
mask: Mask?,
|
||||||
viewModelScope: CoroutineScope,
|
viewModelScope: CoroutineScope,
|
||||||
defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO,
|
defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO,
|
||||||
cameraIntrinsics: CameraIntrinsics?,
|
opticalMeasures: OpticalMeasures?,
|
||||||
): CapturedPage {
|
): CapturedPage {
|
||||||
val exportQuality = ExportQuality.BALANCED
|
val exportQuality = ExportQuality.BALANCED
|
||||||
var colorMode = ColorMode.COLOR
|
var colorMode = ColorMode.COLOR
|
||||||
@@ -144,7 +145,7 @@ fun extractDocumentFromBitmap(
|
|||||||
autoColorMode = autoColorMode(bgr, mask, quad)
|
autoColorMode = autoColorMode(bgr, mask, quad)
|
||||||
colorMode = defaultColorMode.colorMode ?: autoColorMode
|
colorMode = defaultColorMode.colorMode ?: autoColorMode
|
||||||
page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
|
page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels,
|
||||||
cameraIntrinsics)
|
opticalMeasures)
|
||||||
}
|
}
|
||||||
|
|
||||||
val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality)
|
val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality)
|
||||||
@@ -152,7 +153,9 @@ fun extractDocumentFromBitmap(
|
|||||||
page.release()
|
page.release()
|
||||||
|
|
||||||
val baseRotation = Rotation.fromDegrees(rotationDegrees)
|
val baseRotation = Rotation.fromDegrees(rotationDegrees)
|
||||||
val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode, cameraIntrinsics)
|
val sourceSize = ImageSize(source.width, source.height)
|
||||||
|
val metadata =
|
||||||
|
PageMetadata(normalizedQuad, baseRotation, autoColorMode, sourceSize, opticalMeasures)
|
||||||
val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) {
|
val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) {
|
||||||
compressSource(source)
|
compressSource(source)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,12 +15,18 @@
|
|||||||
package org.fairscan.app.ui.screens.camera
|
package org.fairscan.app.ui.screens.camera
|
||||||
|
|
||||||
import android.graphics.Bitmap
|
import android.graphics.Bitmap
|
||||||
|
import android.hardware.camera2.CameraCaptureSession
|
||||||
|
import android.hardware.camera2.CameraMetadata
|
||||||
|
import android.hardware.camera2.CaptureRequest
|
||||||
|
import android.hardware.camera2.CaptureResult
|
||||||
|
import android.hardware.camera2.TotalCaptureResult
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import android.util.Size
|
import android.util.Size
|
||||||
import android.view.ViewGroup.LayoutParams.MATCH_PARENT
|
import android.view.ViewGroup.LayoutParams.MATCH_PARENT
|
||||||
import android.widget.LinearLayout
|
import android.widget.LinearLayout
|
||||||
import androidx.annotation.OptIn
|
import androidx.annotation.OptIn
|
||||||
import androidx.camera.camera2.interop.Camera2CameraInfo
|
import androidx.camera.camera2.interop.Camera2CameraInfo
|
||||||
|
import androidx.camera.camera2.interop.Camera2Interop
|
||||||
import androidx.camera.camera2.interop.ExperimentalCamera2Interop
|
import androidx.camera.camera2.interop.ExperimentalCamera2Interop
|
||||||
import androidx.camera.core.CameraControl
|
import androidx.camera.core.CameraControl
|
||||||
import androidx.camera.core.CameraSelector
|
import androidx.camera.core.CameraSelector
|
||||||
@@ -69,6 +75,7 @@ import androidx.lifecycle.LifecycleOwner
|
|||||||
import androidx.lifecycle.compose.LocalLifecycleOwner
|
import androidx.lifecycle.compose.LocalLifecycleOwner
|
||||||
import org.fairscan.app.ui.components.CameraPermissionState
|
import org.fairscan.app.ui.components.CameraPermissionState
|
||||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
import org.fairscan.imageprocessing.CameraIntrinsics
|
||||||
|
import org.fairscan.imageprocessing.OpticalMeasures
|
||||||
import org.fairscan.imageprocessing.Point
|
import org.fairscan.imageprocessing.Point
|
||||||
import org.fairscan.imageprocessing.Quad
|
import org.fairscan.imageprocessing.Quad
|
||||||
import org.fairscan.imageprocessing.cameraIntrinsics
|
import org.fairscan.imageprocessing.cameraIntrinsics
|
||||||
@@ -193,7 +200,7 @@ fun bindCameraUseCases(
|
|||||||
.setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build()
|
.setOutputImageFormat(ImageAnalysis.OUTPUT_IMAGE_FORMAT_RGBA_8888).build()
|
||||||
imageAnalysis.setAnalyzer(executor, onImageAnalyzed)
|
imageAnalysis.setAnalyzer(executor, onImageAnalyzed)
|
||||||
|
|
||||||
val imageCapture = ImageCapture.Builder()
|
val imageCaptureBuilder = ImageCapture.Builder()
|
||||||
.setResolutionSelector(
|
.setResolutionSelector(
|
||||||
ResolutionSelector.Builder()
|
ResolutionSelector.Builder()
|
||||||
.setResolutionStrategy(
|
.setResolutionStrategy(
|
||||||
@@ -208,7 +215,21 @@ fun bindCameraUseCases(
|
|||||||
.build()
|
.build()
|
||||||
)
|
)
|
||||||
.setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY)
|
.setCaptureMode(ImageCapture.CAPTURE_MODE_MINIMIZE_LATENCY)
|
||||||
.build()
|
|
||||||
|
Camera2Interop.Extender(imageCaptureBuilder)
|
||||||
|
.setSessionCaptureCallback(object : CameraCaptureSession.CaptureCallback() {
|
||||||
|
override fun onCaptureCompleted(
|
||||||
|
session: CameraCaptureSession,
|
||||||
|
request: CaptureRequest,
|
||||||
|
result: TotalCaptureResult
|
||||||
|
) {
|
||||||
|
result.get(CaptureResult.LENS_FOCUS_DISTANCE)?.let {
|
||||||
|
captureController.lastFocusDistanceDiopters = it
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
val imageCapture = imageCaptureBuilder.build()
|
||||||
captureController.imageCapture = imageCapture
|
captureController.imageCapture = imageCapture
|
||||||
|
|
||||||
val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector,
|
val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector,
|
||||||
@@ -296,21 +317,34 @@ class CameraCaptureController {
|
|||||||
private val executor = Executors.newSingleThreadExecutor()
|
private val executor = Executors.newSingleThreadExecutor()
|
||||||
var previewView: PreviewView? = null
|
var previewView: PreviewView? = null
|
||||||
var cameraIntrinsics: CameraIntrinsics? = null
|
var cameraIntrinsics: CameraIntrinsics? = null
|
||||||
|
var canUseFocusDistance = false
|
||||||
|
|
||||||
|
@Volatile
|
||||||
|
var lastFocusDistanceDiopters: Float? = null
|
||||||
|
|
||||||
fun shutdown() {
|
fun shutdown() {
|
||||||
executor.shutdown()
|
executor.shutdown()
|
||||||
}
|
}
|
||||||
|
|
||||||
fun takePicture(onImageCaptured: (ImageProxy?, CameraIntrinsics?) -> Unit) {
|
fun takePicture(onImageCaptured: (ImageProxy?, OpticalMeasures?) -> Unit) {
|
||||||
imageCapture?.takePicture(
|
imageCapture?.takePicture(
|
||||||
executor,
|
executor,
|
||||||
object : ImageCapture.OnImageCapturedCallback() {
|
object : ImageCapture.OnImageCapturedCallback() {
|
||||||
override fun onCaptureSuccess(imageProxy: ImageProxy) {
|
override fun onCaptureSuccess(imageProxy: ImageProxy) {
|
||||||
onImageCaptured(imageProxy, cameraIntrinsics)
|
val diopters = lastFocusDistanceDiopters
|
||||||
|
val subjectDistanceInMm =
|
||||||
|
if (canUseFocusDistance && diopters != null && diopters != 0.0f) {
|
||||||
|
1000 / diopters
|
||||||
|
} else {
|
||||||
|
null
|
||||||
|
}
|
||||||
|
onImageCaptured(
|
||||||
|
imageProxy,
|
||||||
|
cameraIntrinsics?.let { OpticalMeasures(it, subjectDistanceInMm) })
|
||||||
}
|
}
|
||||||
override fun onError(exception: ImageCaptureException) {
|
override fun onError(exception: ImageCaptureException) {
|
||||||
Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception)
|
Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception)
|
||||||
onImageCaptured(null, cameraIntrinsics)
|
onImageCaptured(null, null)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -344,6 +378,12 @@ class CameraCaptureController {
|
|||||||
} else {
|
} else {
|
||||||
cameraIntrinsics(focalLengths[0], max(sensorSize.width, sensorSize.height))
|
cameraIntrinsics(focalLengths[0], max(sensorSize.width, sensorSize.height))
|
||||||
}
|
}
|
||||||
|
val calibration = cameraInfo.getCameraCharacteristic(
|
||||||
|
android.hardware.camera2.CameraCharacteristics.LENS_INFO_FOCUS_DISTANCE_CALIBRATION
|
||||||
|
)
|
||||||
|
canUseFocusDistance =
|
||||||
|
calibration == CameraMetadata.LENS_INFO_FOCUS_DISTANCE_CALIBRATION_CALIBRATED
|
||||||
|
|| calibration == CameraMetadata.LENS_INFO_FOCUS_DISTANCE_CALIBRATION_APPROXIMATE
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -229,8 +229,8 @@ fun CameraScreen(
|
|||||||
Log.i("FairScan", "Pressed <Capture>")
|
Log.i("FairScan", "Pressed <Capture>")
|
||||||
cameraViewModel.onCapturePressed(it)
|
cameraViewModel.onCapturePressed(it)
|
||||||
captureController.takePicture(
|
captureController.takePicture(
|
||||||
onImageCaptured = { imageProxy, cameraCharacteristics ->
|
onImageCaptured = { imageProxy, opticalMeasures ->
|
||||||
cameraViewModel.onImageCaptured(imageProxy, cameraCharacteristics) }
|
cameraViewModel.onImageCaptured(imageProxy, opticalMeasures) }
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -648,7 +648,7 @@ fun CameraScreenPreviewWithProcessedImage() {
|
|||||||
CapturedPage(
|
CapturedPage(
|
||||||
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"),
|
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"),
|
||||||
CompletableDeferred(Jpeg(ByteArray(0))),
|
CompletableDeferred(Jpeg(ByteArray(0))),
|
||||||
PageMetadata(quad, R0, ColorMode.COLOR, null),
|
PageMetadata(quad, R0, ColorMode.COLOR, null, null),
|
||||||
ColorMode.COLOR)))
|
ColorMode.COLOR)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ import org.fairscan.app.domain.CapturedPage
|
|||||||
import org.fairscan.app.platform.extractDocumentFromBitmap
|
import org.fairscan.app.platform.extractDocumentFromBitmap
|
||||||
import org.fairscan.imageprocessing.CameraIntrinsics
|
import org.fairscan.imageprocessing.CameraIntrinsics
|
||||||
import org.fairscan.imageprocessing.ImageSize
|
import org.fairscan.imageprocessing.ImageSize
|
||||||
|
import org.fairscan.imageprocessing.OpticalMeasures
|
||||||
import org.fairscan.imageprocessing.detectDocumentQuad
|
import org.fairscan.imageprocessing.detectDocumentQuad
|
||||||
import java.util.concurrent.CancellationException
|
import java.util.concurrent.CancellationException
|
||||||
|
|
||||||
@@ -134,13 +135,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun onImageCaptured(imageProxy: ImageProxy?, cameraIntrinsics: CameraIntrinsics?) {
|
fun onImageCaptured(imageProxy: ImageProxy?, opticalMeasures: OpticalMeasures?) {
|
||||||
if (imageProxy != null) {
|
if (imageProxy != null) {
|
||||||
viewModelScope.launch {
|
viewModelScope.launch {
|
||||||
try {
|
try {
|
||||||
val source = imageProxy.toBitmap()
|
val source = imageProxy.toBitmap()
|
||||||
val rotationDegrees = imageProxy.imageInfo.rotationDegrees
|
val rotationDegrees = imageProxy.imageInfo.rotationDegrees
|
||||||
val page = processCapturedImage(source, rotationDegrees, cameraIntrinsics)
|
val page = processCapturedImage(source, rotationDegrees, opticalMeasures)
|
||||||
imageProxy.close()
|
imageProxy.close()
|
||||||
onCaptureProcessed(page)
|
onCaptureProcessed(page)
|
||||||
} catch (e: RuntimeException) {
|
} catch (e: RuntimeException) {
|
||||||
@@ -156,7 +157,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
|||||||
private suspend fun processCapturedImage(
|
private suspend fun processCapturedImage(
|
||||||
source: Bitmap,
|
source: Bitmap,
|
||||||
rotationDegrees: Int,
|
rotationDegrees: Int,
|
||||||
cameraIntrinsics: CameraIntrinsics?,
|
opticalMeasures: OpticalMeasures?,
|
||||||
): CapturedPage = withContext(Dispatchers.IO) {
|
): CapturedPage = withContext(Dispatchers.IO) {
|
||||||
val segmentation = imageSegmentationService.runSegmentationAndReturn(source)
|
val segmentation = imageSegmentationService.runSegmentationAndReturn(source)
|
||||||
val mask = segmentation?.segmentation
|
val mask = segmentation?.segmentation
|
||||||
@@ -164,7 +165,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
|||||||
val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) }
|
val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) }
|
||||||
val defaultColorMode = settingsRepository.defaultColorMode.first()
|
val defaultColorMode = settingsRepository.defaultColorMode.first()
|
||||||
val result = extractDocumentFromBitmap(
|
val result = extractDocumentFromBitmap(
|
||||||
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, cameraIntrinsics)
|
source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, opticalMeasures)
|
||||||
return@withContext result
|
return@withContext result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ import org.fairscan.app.data.FileManager
|
|||||||
import org.fairscan.app.data.ImageRepository
|
import org.fairscan.app.data.ImageRepository
|
||||||
import org.fairscan.app.domain.ExportQuality
|
import org.fairscan.app.domain.ExportQuality
|
||||||
import org.fairscan.app.domain.PageViewKey
|
import org.fairscan.app.domain.PageViewKey
|
||||||
import org.fairscan.app.domain.jpegsForExport
|
import org.fairscan.app.domain.pagesToExport
|
||||||
import org.fairscan.app.ui.screens.settings.ExportFormat
|
import org.fairscan.app.ui.screens.settings.ExportFormat
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.io.FileInputStream
|
import java.io.FileInputStream
|
||||||
@@ -76,8 +76,8 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit
|
|||||||
private suspend fun generatePdf(
|
private suspend fun generatePdf(
|
||||||
exportQuality: ExportQuality
|
exportQuality: ExportQuality
|
||||||
): ExportResult.Pdf = withContext(Dispatchers.IO) {
|
): ExportResult.Pdf = withContext(Dispatchers.IO) {
|
||||||
val jpegs = jpegsForExport(imageRepository, exportQuality)
|
val pageToExports = pagesToExport(imageRepository, exportQuality)
|
||||||
val pdf = fileManager.generatePdf(jpegs)
|
val pdf = fileManager.generatePdf(pageToExports)
|
||||||
return@withContext ExportResult.Pdf(pdf.file, pdf.sizeInBytes, pdf.pageCount)
|
return@withContext ExportResult.Pdf(pdf.file, pdf.sizeInBytes, pdf.pageCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -181,12 +181,12 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit
|
|||||||
private suspend fun generateJpegs(
|
private suspend fun generateJpegs(
|
||||||
exportQuality: ExportQuality
|
exportQuality: ExportQuality
|
||||||
): ExportResult.Jpeg = withContext(Dispatchers.IO) {
|
): ExportResult.Jpeg = withContext(Dispatchers.IO) {
|
||||||
val jpegs = jpegsForExport(imageRepository, exportQuality)
|
val pageToExports = pagesToExport(imageRepository, exportQuality)
|
||||||
val timestamp = System.currentTimeMillis()
|
val timestamp = System.currentTimeMillis()
|
||||||
preparationDir.mkdirs()
|
preparationDir.mkdirs()
|
||||||
val files = jpegs.mapIndexed { index, jpeg ->
|
val files = pageToExports.mapIndexed { index, page ->
|
||||||
val file = File(preparationDir, "$timestamp-${index + 1}.jpg")
|
val file = File(preparationDir, "$timestamp-${index + 1}.jpg")
|
||||||
file.writeBytes(jpeg.get().bytes)
|
file.writeBytes(page.jpeg.get().bytes)
|
||||||
file
|
file
|
||||||
}.toList()
|
}.toList()
|
||||||
val sizeInBytes = files.sumOf { it.length() }
|
val sizeInBytes = files.sumOf { it.length() }
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ package org.fairscan.app.data
|
|||||||
import kotlinx.coroutines.test.runTest
|
import kotlinx.coroutines.test.runTest
|
||||||
import org.assertj.core.api.Assertions.assertThat
|
import org.assertj.core.api.Assertions.assertThat
|
||||||
import org.fairscan.app.domain.Jpeg
|
import org.fairscan.app.domain.Jpeg
|
||||||
import org.fairscan.app.domain.JpegProvider
|
import org.fairscan.app.domain.PageToExport
|
||||||
import org.junit.Test
|
import org.junit.Test
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.io.OutputStream
|
import java.io.OutputStream
|
||||||
@@ -73,15 +73,16 @@ class FileManagerTest {
|
|||||||
@Test
|
@Test
|
||||||
fun generatePdf() = runTest {
|
fun generatePdf() = runTest {
|
||||||
val fakePdfWriter = object : PdfWriter {
|
val fakePdfWriter = object : PdfWriter {
|
||||||
override suspend fun writePdfFromJpegs(jpegs: List<JpegProvider>, outputStream: OutputStream): Int {
|
override suspend fun writePdfFromJpegs(pages: List<PageToExport>, outputStream: OutputStream): Int {
|
||||||
val list = jpegs.toList()
|
val list = pages.toList()
|
||||||
list.forEach { bytes -> outputStream.write(bytes.get().bytes) }
|
list.forEach { page -> outputStream.write(page.jpeg.get().bytes) }
|
||||||
return list.size
|
return list.size
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
val manager = FileManager(pdfDir, externalDir, fakePdfWriter)
|
val manager = FileManager(pdfDir, externalDir, fakePdfWriter)
|
||||||
val jpegs = listOf(byteArrayOf(0x01, 0x02), byteArrayOf(0x11)).map { JpegProvider { Jpeg(it) } }
|
val pages = listOf(byteArrayOf(0x01, 0x02), byteArrayOf(0x11))
|
||||||
val pdf = manager.generatePdf(jpegs)
|
.map { PageToExport(null) { Jpeg(it) } }
|
||||||
|
val pdf = manager.generatePdf(pages)
|
||||||
assertThat(pdf.pageCount).isEqualTo(2)
|
assertThat(pdf.pageCount).isEqualTo(2)
|
||||||
assertThat(pdf.sizeInBytes).isEqualTo(3)
|
assertThat(pdf.sizeInBytes).isEqualTo(3)
|
||||||
assertThat(pdf.file.readBytes()).isEqualTo(byteArrayOf(0x01, 0x02, 0x11))
|
assertThat(pdf.file.readBytes()).isEqualTo(byteArrayOf(0x01, 0x02, 0x11))
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ import org.assertj.core.api.Assertions.assertThat
|
|||||||
import org.fairscan.app.domain.Jpeg
|
import org.fairscan.app.domain.Jpeg
|
||||||
import org.fairscan.app.domain.PageMetadata
|
import org.fairscan.app.domain.PageMetadata
|
||||||
import org.fairscan.app.domain.PageViewKey
|
import org.fairscan.app.domain.PageViewKey
|
||||||
import org.fairscan.app.domain.Rotation
|
|
||||||
import org.fairscan.app.domain.Rotation.R0
|
import org.fairscan.app.domain.Rotation.R0
|
||||||
import org.fairscan.app.domain.Rotation.R180
|
import org.fairscan.app.domain.Rotation.R180
|
||||||
import org.fairscan.app.domain.Rotation.R270
|
import org.fairscan.app.domain.Rotation.R270
|
||||||
@@ -35,6 +34,8 @@ import org.fairscan.imageprocessing.CameraIntrinsics
|
|||||||
import org.fairscan.imageprocessing.ColorMode
|
import org.fairscan.imageprocessing.ColorMode
|
||||||
import org.fairscan.imageprocessing.ColorMode.COLOR
|
import org.fairscan.imageprocessing.ColorMode.COLOR
|
||||||
import org.fairscan.imageprocessing.ColorMode.GRAYSCALE
|
import org.fairscan.imageprocessing.ColorMode.GRAYSCALE
|
||||||
|
import org.fairscan.imageprocessing.ImageSize
|
||||||
|
import org.fairscan.imageprocessing.OpticalMeasures
|
||||||
import org.fairscan.imageprocessing.Point
|
import org.fairscan.imageprocessing.Point
|
||||||
import org.fairscan.imageprocessing.Quad
|
import org.fairscan.imageprocessing.Quad
|
||||||
import org.junit.Rule
|
import org.junit.Rule
|
||||||
@@ -52,8 +53,9 @@ class ImageRepositoryTest {
|
|||||||
private val testScope = TestScope()
|
private val testScope = TestScope()
|
||||||
|
|
||||||
val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09))
|
val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09))
|
||||||
val intrinsics = CameraIntrinsics(42.0f, 43.0f)
|
val opticalMeasures = OpticalMeasures(CameraIntrinsics(42.0f, 43.0f), 44.0f)
|
||||||
val metadata1 = PageMetadata(quad1, R90, COLOR, intrinsics)
|
val sourceSize = ImageSize(1600, 1200)
|
||||||
|
val metadata1 = PageMetadata(quad1, R90, COLOR, sourceSize, opticalMeasures)
|
||||||
|
|
||||||
fun getFilesDir(): File {
|
fun getFilesDir(): File {
|
||||||
if (_filesDir == null) {
|
if (_filesDir == null) {
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ espressoCore = "3.7.0"
|
|||||||
lifecycleRuntimeKtx = "2.10.0"
|
lifecycleRuntimeKtx = "2.10.0"
|
||||||
activityCompose = "1.13.0"
|
activityCompose = "1.13.0"
|
||||||
composeBom = "2026.03.00"
|
composeBom = "2026.03.00"
|
||||||
camerax = "1.5.3"
|
camerax = "1.6.1"
|
||||||
datastore = "1.2.1"
|
datastore = "1.2.1"
|
||||||
documentfile = "1.1.0"
|
documentfile = "1.1.0"
|
||||||
litert = "1.4.1"
|
litert = "1.4.1"
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ import org.opencv.core.MatOfPoint2f
|
|||||||
import org.opencv.core.Size
|
import org.opencv.core.Size
|
||||||
import org.opencv.imgproc.Imgproc
|
import org.opencv.imgproc.Imgproc
|
||||||
import kotlin.math.abs
|
import kotlin.math.abs
|
||||||
|
import kotlin.math.sqrt
|
||||||
|
|
||||||
interface Mask {
|
interface Mask {
|
||||||
val width: Int
|
val width: Int
|
||||||
@@ -156,14 +157,15 @@ fun extractDocument(
|
|||||||
rotationDegrees: Int,
|
rotationDegrees: Int,
|
||||||
colorMode: ColorMode,
|
colorMode: ColorMode,
|
||||||
maxPixels: Long,
|
maxPixels: Long,
|
||||||
cameraIntrinsics: CameraIntrinsics? = null,
|
opticalMeasures: OpticalMeasures? = null,
|
||||||
): Mat {
|
): Mat {
|
||||||
val (targetWidth, targetHeight) = estimateRealDimensions(
|
val estimatedDimensions = estimateRealDimensions(
|
||||||
quad,
|
quad,
|
||||||
inputMat.cols(),
|
inputMat.cols(),
|
||||||
inputMat.rows(),
|
inputMat.rows(),
|
||||||
cameraIntrinsics
|
opticalMeasures,
|
||||||
)
|
)
|
||||||
|
val (targetWidth, targetHeight) = estimatedDimensions.toPixelDimensions(quad)
|
||||||
val srcPoints = MatOfPoint2f(
|
val srcPoints = MatOfPoint2f(
|
||||||
quad.topLeft.toCv(),
|
quad.topLeft.toCv(),
|
||||||
quad.topRight.toCv(),
|
quad.topRight.toCv(),
|
||||||
@@ -193,6 +195,17 @@ fun extractDocument(
|
|||||||
return rotated
|
return rotated
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun EstimatedDimensions.toPixelDimensions(quad: Quad): Pair<Double, Double> {
|
||||||
|
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
|
||||||
|
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
|
||||||
|
val projectedArea = w * h
|
||||||
|
|
||||||
|
val ratio = aspectRatio
|
||||||
|
val targetWidth = sqrt(projectedArea / ratio)
|
||||||
|
val targetHeight = targetWidth * ratio
|
||||||
|
return Pair(targetWidth, targetHeight)
|
||||||
|
}
|
||||||
|
|
||||||
fun rotate(input: Mat, degrees: Int): Mat {
|
fun rotate(input: Mat, degrees: Int): Mat {
|
||||||
val output = Mat()
|
val output = Mat()
|
||||||
when ((degrees % 360 + 360) % 360) {
|
when ((degrees % 360 + 360) % 360) {
|
||||||
|
|||||||
@@ -47,6 +47,24 @@ fun cameraIntrinsics(focalLengthInMm: Float?, sensorWidthInMm: Float?): CameraIn
|
|||||||
return CameraIntrinsics(focalLengthInMm, sensorWidthInMm)
|
return CameraIntrinsics(focalLengthInMm, sensorWidthInMm)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data class OpticalMeasures(
|
||||||
|
val cameraIntrinsics: CameraIntrinsics,
|
||||||
|
// in millimeters
|
||||||
|
val subjectDistance: Float?,
|
||||||
|
)
|
||||||
|
|
||||||
|
sealed class EstimatedDimensions {
|
||||||
|
// Dimensions in mm, when subject distance is available
|
||||||
|
data class Physical(val widthMm: Double, val heightMm: Double) : EstimatedDimensions()
|
||||||
|
// Dimensions in arbitrary units, only ratio is meaningful
|
||||||
|
data class Ratio(val width: Double, val height: Double) : EstimatedDimensions()
|
||||||
|
|
||||||
|
val aspectRatio: Double get() = when (this) {
|
||||||
|
is Physical -> heightMm / widthMm
|
||||||
|
is Ratio -> height / width
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Estimates the true width and height of the document in the output image,
|
* Estimates the true width and height of the document in the output image,
|
||||||
* correcting for perspective distortion using projective geometry.
|
* correcting for perspective distortion using projective geometry.
|
||||||
@@ -63,13 +81,13 @@ fun estimateRealDimensions(
|
|||||||
quad: Quad,
|
quad: Quad,
|
||||||
imageWidth: Int,
|
imageWidth: Int,
|
||||||
imageHeight: Int,
|
imageHeight: Int,
|
||||||
cameraIntrinsics: CameraIntrinsics?
|
opticalMeasures: OpticalMeasures?,
|
||||||
): Pair<Double, Double> {
|
): EstimatedDimensions {
|
||||||
|
|
||||||
fun averageSides(): Pair<Double, Double> {
|
fun averageSides(): EstimatedDimensions.Ratio {
|
||||||
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
|
val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2
|
||||||
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
|
val h = (norm(quad.topLeft, quad.bottomLeft) + norm(quad.topRight, quad.bottomRight)) / 2
|
||||||
return Pair(w, h)
|
return EstimatedDimensions.Ratio(w, h)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Homogeneous 2D point
|
// Homogeneous 2D point
|
||||||
@@ -97,8 +115,9 @@ fun estimateRealDimensions(
|
|||||||
val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy)
|
val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy)
|
||||||
val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy)
|
val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy)
|
||||||
|
|
||||||
val f = if (cameraIntrinsics != null) {
|
val f = if (opticalMeasures != null) {
|
||||||
cameraIntrinsics.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble()
|
opticalMeasures.cameraIntrinsics
|
||||||
|
.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble()
|
||||||
} else {
|
} else {
|
||||||
// Focal length estimated assuming zero skew and principal point at image center.
|
// Focal length estimated assuming zero skew and principal point at image center.
|
||||||
// Under these assumptions, the Image of the Absolute Conic (IAC) simplifies,
|
// Under these assumptions, the Image of the Absolute Conic (IAC) simplifies,
|
||||||
@@ -131,26 +150,37 @@ fun estimateRealDimensions(
|
|||||||
// Camera ray through a corner: K⁻¹ · (u, v, 1)
|
// Camera ray through a corner: K⁻¹ · (u, v, 1)
|
||||||
fun ray(p: Point) = Vector3D((p.x - cx) / f, (p.y - cy) / f, 1.0)
|
fun ray(p: Point) = Vector3D((p.x - cx) / f, (p.y - cy) / f, 1.0)
|
||||||
|
|
||||||
// Intersect ray with document plane: X = t·r where t = 1 / (n·r)
|
// Scale factor: either from subject distance, or arbitrary (ratio only)
|
||||||
// We assume an arbitrary plane distance (d = 1). Absolute scale is wrong,
|
val subjectDistance = opticalMeasures?.subjectDistance?.toDouble()
|
||||||
// but cancels out when computing length ratios.
|
val scale: Double? = if (subjectDistance != null) {
|
||||||
|
// Project subject distance onto the plane normal to get perpendicular distance
|
||||||
|
val centerX = (quad.topLeft.x + quad.topRight.x + quad.bottomLeft.x + quad.bottomRight.x) / 4.0
|
||||||
|
val centerY = (quad.topLeft.y + quad.topRight.y + quad.bottomLeft.y + quad.bottomRight.y) / 4.0
|
||||||
|
val centerRay = ray(Point(centerX, centerY)).let { it * (1.0 / it.norm()) }
|
||||||
|
val cosAngle = centerRay.dotProduct(n).absoluteValue
|
||||||
|
if (cosAngle < 0.1) null // document too tilted, unreliable
|
||||||
|
else subjectDistance * cosAngle
|
||||||
|
} else null
|
||||||
|
|
||||||
|
// Intersect ray with document plane: X = t·r where t = d / (n·r)
|
||||||
|
// When subjectDistance is unavailable, we assume an arbitrary plane distance (d = 1): absolute
|
||||||
|
// scale is wrong, but cancels out when computing length ratios.
|
||||||
fun corner3D(p: Point): Vector3D {
|
fun corner3D(p: Point): Vector3D {
|
||||||
val r = ray(p)
|
val r = ray(p)
|
||||||
return r * (1.0 / n.dotProduct(r))
|
val t = if (scale != null) scale / n.dotProduct(r) else 1.0 / n.dotProduct(r)
|
||||||
|
return r * t
|
||||||
}
|
}
|
||||||
|
|
||||||
val xTL = corner3D(quad.topLeft); val xTR = corner3D(quad.topRight)
|
val xTL = corner3D(quad.topLeft); val xTR = corner3D(quad.topRight)
|
||||||
val xBR = corner3D(quad.bottomRight); val xBL = corner3D(quad.bottomLeft)
|
val xBR = corner3D(quad.bottomRight); val xBL = corner3D(quad.bottomLeft)
|
||||||
|
|
||||||
// Side lengths in reconstructed 3D space (up to an unknown global scale)
|
// Side lengths in reconstructed 3D space
|
||||||
val realW = ((xTR - xTL).norm() + (xBR - xBL).norm()) / 2
|
val realW = ((xTR - xTL).norm() + (xBR - xBL).norm()) / 2
|
||||||
val realH = ((xBL - xTL).norm() + (xBR - xTR).norm()) / 2
|
val realH = ((xBL - xTL).norm() + (xBR - xTR).norm()) / 2
|
||||||
|
|
||||||
// Output dimensions: preserve projected area, apply corrected aspect ratio
|
return if (opticalMeasures != null && scale != null) {
|
||||||
val ratio = realH / realW
|
EstimatedDimensions.Physical(realW, realH)
|
||||||
val (projW, projH) = averageSides()
|
} else {
|
||||||
val targetWidth = sqrt(projW * projH / ratio)
|
EstimatedDimensions.Ratio(realW, realH)
|
||||||
val targetHeight = targetWidth * ratio
|
}
|
||||||
|
|
||||||
return Pair(targetWidth, targetHeight)
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user