New quad algorithm: identify edges from contour orientation (#130)
Goal: improve precision of automatic document cropping by switching: - from Douglas-Peucker algorithm (OpenCV's approxPolyDP) + a heuristic for documents missing a corner - to an algorithm that looks for edges * New quad algorithm: identify edges from contour orientation * Performance optimization: reduce number of calls to trigonometric functions * Performance: use a single threshold for live analysis * Fix orientation of debug mask and compute it only if required * Exclude quads that go out of the frame
This commit is contained in:
committed by
GitHub
parent
cf196576fe
commit
343495dafe
@@ -29,6 +29,7 @@ import kotlinx.coroutines.sync.Mutex
|
||||
import kotlinx.coroutines.sync.withLock
|
||||
import kotlinx.coroutines.withContext
|
||||
import org.fairscan.app.data.Logger
|
||||
import org.fairscan.imageprocessing.ImageSize
|
||||
import org.fairscan.imageprocessing.Mask
|
||||
import org.opencv.core.CvType
|
||||
import org.opencv.core.Mat
|
||||
@@ -39,7 +40,6 @@ import org.tensorflow.lite.support.common.ops.NormalizeOp
|
||||
import org.tensorflow.lite.support.image.ImageProcessor
|
||||
import org.tensorflow.lite.support.image.TensorImage
|
||||
import org.tensorflow.lite.support.image.ops.ResizeOp
|
||||
import org.tensorflow.lite.support.image.ops.Rot90Op
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.ByteOrder
|
||||
|
||||
@@ -73,13 +73,11 @@ class ImageSegmentationService(private val context: Context, private val logger:
|
||||
private fun runSegmentation(interpreter: Interpreter, bitmap: Bitmap, rotationDegrees: Int): SegmentationResult {
|
||||
val startTime = SystemClock.uptimeMillis()
|
||||
|
||||
val rotation = -rotationDegrees / 90
|
||||
val (_, h, w, _) = interpreter.getOutputTensor(0).shape()
|
||||
val imageProcessor =
|
||||
ImageProcessor
|
||||
.Builder()
|
||||
.add(ResizeOp(h, w, ResizeOp.ResizeMethod.BILINEAR))
|
||||
.add(Rot90Op(rotation))
|
||||
.add(NormalizeOp(127.5f, 127.5f)) // TODO check if it's correct
|
||||
.build()
|
||||
val tensorImage = TensorImage(DataType.FLOAT32)
|
||||
@@ -88,7 +86,11 @@ class ImageSegmentationService(private val context: Context, private val logger:
|
||||
val segmentResult = segment(interpreter, processedImage)
|
||||
|
||||
val inferenceTime = SystemClock.uptimeMillis() - startTime
|
||||
return SegmentationResult(segmentResult, inferenceTime)
|
||||
return SegmentationResult(
|
||||
segmentResult,
|
||||
ImageSize(bitmap.width, bitmap.height),
|
||||
rotationDegrees,
|
||||
inferenceTime)
|
||||
}
|
||||
|
||||
suspend fun runSegmentationAndReturn(bitmap: Bitmap, rotationDegrees: Int): SegmentationResult? {
|
||||
@@ -163,10 +165,14 @@ class ImageSegmentationService(private val context: Context, private val logger:
|
||||
mask.put(0, 0, data)
|
||||
return mask
|
||||
}
|
||||
|
||||
fun maskSize() = ImageSize(width, height)
|
||||
}
|
||||
|
||||
data class SegmentationResult(
|
||||
val segmentation: Segmentation,
|
||||
val originalSize: ImageSize,
|
||||
val rotationDegrees: Int,
|
||||
val inferenceTime: Long
|
||||
)
|
||||
}
|
||||
|
||||
@@ -211,7 +211,7 @@ fun bindCameraUseCases(
|
||||
|
||||
@Composable
|
||||
fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) {
|
||||
val binaryMask = liveAnalysisState.binaryMask ?: return
|
||||
val maskSize = liveAnalysisState.maskSize ?: return
|
||||
val targetQuad = liveAnalysisState.stableQuad
|
||||
var displayedQuad by remember { mutableStateOf<Quad?>(null) }
|
||||
val quadColor = MaterialTheme.colorScheme.primary
|
||||
@@ -233,14 +233,15 @@ fun AnalysisOverlay(liveAnalysisState: LiveAnalysisState, debugMode: Boolean) {
|
||||
|
||||
Canvas(modifier = Modifier.fillMaxSize()) {
|
||||
if (debugMode) {
|
||||
drawMask(this, binaryMask)
|
||||
val binaryMask = liveAnalysisState.binaryMaskProvider.invoke()
|
||||
binaryMask?.let { drawMask(this, it) }
|
||||
}
|
||||
displayedQuad?.let { quad ->
|
||||
val scaledQuad = quad.scaledTo(
|
||||
fromWidth = binaryMask.width,
|
||||
fromHeight = binaryMask.height,
|
||||
toWidth = size.width.toInt(),
|
||||
toHeight = size.height.toInt()
|
||||
fromWidth = maskSize.width,
|
||||
fromHeight = maskSize.height,
|
||||
toWidth = size.width.toDouble(),
|
||||
toHeight = size.height.toDouble()
|
||||
)
|
||||
scaledQuad.edges().forEach {
|
||||
drawLine(quadColor, it.from.toOffset(), it.to.toOffset(), 10.0f)
|
||||
|
||||
@@ -16,12 +16,14 @@ package org.fairscan.app.ui.screens.camera
|
||||
|
||||
import android.graphics.Bitmap
|
||||
import androidx.compose.runtime.Immutable
|
||||
import org.fairscan.imageprocessing.ImageSize
|
||||
import org.fairscan.imageprocessing.Quad
|
||||
|
||||
@Immutable
|
||||
data class LiveAnalysisState(
|
||||
val inferenceTime: Long = 0L,
|
||||
val binaryMask: Bitmap? = null,
|
||||
val maskSize: ImageSize? = null,
|
||||
val binaryMaskProvider: () -> Bitmap? = { -> null },
|
||||
val documentQuad: Quad? = null,
|
||||
val stableQuad: Quad? = null,
|
||||
)
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
package org.fairscan.app.ui.screens.camera
|
||||
|
||||
import android.graphics.Bitmap
|
||||
import android.graphics.Matrix
|
||||
import androidx.camera.core.ImageProxy
|
||||
import androidx.core.graphics.createBitmap
|
||||
import androidx.lifecycle.ViewModel
|
||||
@@ -72,16 +73,29 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
||||
imageSegmentationService.segmentation
|
||||
.filterNotNull()
|
||||
.collect { result ->
|
||||
// TODO Should we really call toBinaryMask if it's used only in debug mode?
|
||||
val binaryMask = result.segmentation.toBinaryMask()
|
||||
val binaryMaskProvider = { ->
|
||||
var binaryMask: Bitmap = result.segmentation.toBinaryMask()
|
||||
if (result.rotationDegrees != 0) {
|
||||
binaryMask = rotateBitmap(binaryMask, result.rotationDegrees.toFloat())
|
||||
}
|
||||
binaryMask
|
||||
}
|
||||
|
||||
val rawQuad = detectDocumentQuad(
|
||||
result.segmentation,
|
||||
result.originalSize,
|
||||
isLiveAnalysis = true
|
||||
)?.rotate90(
|
||||
result.rotationDegrees / 90,
|
||||
result.segmentation.width,
|
||||
result.segmentation.height
|
||||
)
|
||||
|
||||
val stableQuad = quadStabilizer.update(rawQuad)
|
||||
_liveAnalysisState.value = LiveAnalysisState(
|
||||
inferenceTime = result.inferenceTime,
|
||||
binaryMask = binaryMask,
|
||||
binaryMaskProvider = binaryMaskProvider,
|
||||
maskSize = result.segmentation.maskSize(),
|
||||
documentQuad = rawQuad,
|
||||
stableQuad = stableQuad,
|
||||
)
|
||||
@@ -145,13 +159,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
|
||||
|
||||
private suspend fun processCapturedImage(
|
||||
source: Bitmap,
|
||||
rotationDegrees: Int
|
||||
rotationDegrees: Int,
|
||||
): CapturedPage? = withContext(Dispatchers.IO) {
|
||||
var result: CapturedPage? = null
|
||||
val segmentation = imageSegmentationService.runSegmentationAndReturn(source, 0)
|
||||
if (segmentation != null) {
|
||||
val mask = segmentation.segmentation
|
||||
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
|
||||
val quad = detectDocumentQuad(mask, segmentation.originalSize, isLiveAnalysis = false)
|
||||
if (quad != null) {
|
||||
val resizedQuad = quad.scaledTo(mask.width, mask.height, source.width, source.height)
|
||||
result = extractDocumentFromBitmap(source, resizedQuad, rotationDegrees, mask)
|
||||
@@ -230,3 +244,9 @@ fun toBitmap(bgr: Mat): Bitmap {
|
||||
rgba.release()
|
||||
return bmp
|
||||
}
|
||||
|
||||
fun rotateBitmap(source: Bitmap, angle: Float): Bitmap {
|
||||
val matrix = Matrix()
|
||||
matrix.postRotate(angle)
|
||||
return Bitmap.createBitmap(source, 0, 0, source.getWidth(), source.getHeight(), matrix, true)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user