Store page metadata after capture

This commit is contained in:
Pierre-Yves Nicolas
2026-01-09 15:24:15 +01:00
parent 0439971e57
commit fcdcea1891
11 changed files with 198 additions and 64 deletions

View File

@@ -103,7 +103,8 @@ class MainViewModel(val imageRepository: ImageRepository, launchMode: LaunchMode
viewModelScope.launch {
imageRepository.add(
compressJpeg(capturedPage.page, 75),
compressJpeg(capturedPage.source, 90)
compressJpeg(capturedPage.source, 90),
capturedPage.metadata,
)
_pageIds.value = imageRepository.imageIds()
}

View File

@@ -25,4 +25,21 @@ data class DocumentMetadata(
@Serializable
data class Page(
val file: String,
val quad: NormalizedQuad? = null,
val rotationDegrees: Int = 0,
val isColored: Boolean? = null
)
@Serializable
data class NormalizedQuad(
val topLeft: PointD,
val topRight: PointD,
val bottomRight: PointD,
val bottomLeft: PointD
)
@Serializable
data class PointD(
val x: Double,
val y: Double
)

View File

@@ -14,9 +14,12 @@
*/
package org.fairscan.app.data
import kotlinx.collections.immutable.ImmutableList
import kotlinx.collections.immutable.toImmutableList
import kotlinx.collections.immutable.PersistentList
import kotlinx.collections.immutable.toPersistentList
import kotlinx.serialization.json.Json
import org.fairscan.app.domain.PageMetadata
import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad
import java.io.File
const val SOURCE_DIR_NAME = "sources"
@@ -43,30 +46,33 @@ class ImageRepository(
private val metadataFile = File(scanDir, "document.json")
private var fileNames: MutableList<String> =
loadFileNames()
private var pages: MutableList<Page> = loadPages()
private fun loadFileNames(): MutableList<String> {
val filesOnDisk: Set<String> = scanDir.listFiles()
private fun loadPages(): MutableList<Page> {
val filesOnDisk = scanDir.listFiles()
?.filter { it.extension == "jpg" }
?.map { it.name }
?.toSet()
?: emptySet()
val metadataFiles: List<String>? = loadMetadata()
?.pages
?.map { it.file }
val metadataPages = loadMetadata()?.pages
return when {
metadataFiles != null -> metadataFiles
.filter { it in filesOnDisk }
.toMutableList()
else -> filesOnDisk
.sorted()
.toMutableList()
metadataPages != null ->
metadataPages
.filter { it.file in filesOnDisk }
.toMutableList()
else ->
filesOnDisk
.sorted()
.map { Page(file = it) }
.toMutableList()
}
}
private fun indexOfPage(id: String): Int =
pages.indexOfFirst { it.file == id }
private fun loadMetadata(): DocumentMetadata? =
if (metadataFile.exists()) {
runCatching {
@@ -75,19 +81,33 @@ class ImageRepository(
} else null
private fun saveMetadata() {
val metadata = DocumentMetadata(version = 1, pages = fileNames.map { id -> Page(id) })
val metadata = DocumentMetadata(version = 1, pages = pages)
metadataFile.writeText(Json.encodeToString(metadata))
}
fun imageIds(): ImmutableList<String> = fileNames.toImmutableList()
fun imageIds(): PersistentList<String> =
pages.map { it.file }.toPersistentList()
fun add(pageBytes: ByteArray, sourceBytes: ByteArray? = null) {
fun getPageMetadata(id: String): PageMetadata? {
val index = indexOfPage(id)
if (index < 0) return null
return pages[index].toMetadata()
}
fun add(pageBytes: ByteArray, sourceBytes: ByteArray, metadata: PageMetadata) {
val fileName = "${System.currentTimeMillis()}.jpg"
val file = File(scanDir, fileName)
file.writeBytes(pageBytes)
writeThumbnail(file)
sourceBytes?.let { File(sourceDir, fileName).writeBytes(sourceBytes) }
fileNames.add(fileName)
File(sourceDir, fileName).writeBytes(sourceBytes)
pages.add(
Page(
file = fileName,
quad = metadata.normalizedQuad.toSerializable(),
rotationDegrees = metadata.rotationDegrees,
isColored = metadata.isColored
)
)
saveMetadata()
}
@@ -106,9 +126,10 @@ class ImageRepository(
val rotatedFile = File(scanDir, rotatedId)
transformations.rotate(originalFile, rotatedFile, clockwise)
if (rotatedFile.exists()) {
val index = fileNames.indexOf(id)
val index = indexOfPage(id)
if (index >= 0) {
fileNames[index] = rotatedId
val oldPage = pages[index]
pages[index] = oldPage.copy(file = rotatedId)
saveMetadata()
}
delete(id)
@@ -143,21 +164,24 @@ class ImageRepository(
private fun getThumbnailFile(id: String): File = File(thumbnailDir, id)
fun movePage(id: String, newIndex: Int) {
if (!fileNames.remove(id)) return
val safeIndex = newIndex.coerceIn(0, fileNames.size)
fileNames.add(safeIndex, id)
val index = indexOfPage(id)
if (index < 0) return
val page = pages.removeAt(index)
val safeIndex = newIndex.coerceIn(0, pages.size)
pages.add(safeIndex, page)
saveMetadata()
}
fun delete(id: String) {
File(scanDir, id).delete()
getThumbnailFile(id).delete()
fileNames.remove(id)
pages.removeAll { it.file == id }
saveMetadata()
}
fun clear() {
fileNames.clear()
pages.clear()
thumbnailDir.listFiles()?.forEach {
file -> file.delete()
}
@@ -170,3 +194,24 @@ class ImageRepository(
saveMetadata() // "empty" json file
}
}
fun Quad.toSerializable(): NormalizedQuad =
NormalizedQuad(
topLeft = PointD(topLeft.x, topLeft.y),
topRight = PointD(topRight.x, topRight.y),
bottomRight = PointD(bottomRight.x, bottomRight.y),
bottomLeft = PointD(bottomLeft.x, bottomLeft.y)
)
fun NormalizedQuad.toQuad(): Quad =
Quad(
Point(topLeft.x, topLeft.y),
Point(topRight.x, topRight.y),
Point(bottomRight.x, bottomRight.y),
Point(bottomLeft.x, bottomLeft.y)
)
fun Page.toMetadata(): PageMetadata? {
if (quad == null || isColored == null) return null
return PageMetadata(quad.toQuad(), rotationDegrees, isColored)
}

View File

@@ -19,5 +19,6 @@ import android.graphics.Bitmap
data class CapturedPage(
val page: Bitmap,
val source: Bitmap
val source: Bitmap,
val metadata: PageMetadata,
)

View File

@@ -0,0 +1,23 @@
/*
* Copyright 2025 Pierre-Yves Nicolas
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.fairscan.app.domain
import org.fairscan.imageprocessing.Quad
data class PageMetadata(
val normalizedQuad: Quad,
val rotationDegrees: Int,
val isColored: Boolean,
)

View File

@@ -85,6 +85,7 @@ import kotlinx.coroutines.delay
import org.fairscan.app.MainViewModel
import org.fairscan.app.R
import org.fairscan.app.domain.CapturedPage
import org.fairscan.app.domain.PageMetadata
import org.fairscan.app.ui.Navigation
import org.fairscan.app.ui.Screen
import org.fairscan.app.ui.components.CameraPermissionState
@@ -95,6 +96,8 @@ import org.fairscan.app.ui.components.pageCountText
import org.fairscan.app.ui.dummyNavigation
import org.fairscan.app.ui.fakeDocument
import org.fairscan.app.ui.theme.FairScanTheme
import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad
const val CAPTURED_IMAGE_DISPLAY_DURATION = 1500L
const val ANIMATION_DURATION = 200
@@ -461,11 +464,14 @@ fun CameraScreenPreview() {
@Preview(showBackground = true, showSystemUi = true)
@Composable
fun CameraScreenPreviewWithProcessedImage() {
val p = Point(0 , 0)
val quad = Quad(p, p, p, p)
ScreenPreview(CaptureState.CapturePreview(
debugImage("uncropped/img01.jpg"),
CapturedPage(
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"),
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"))))
debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"),
PageMetadata(quad, 0, false))))
}
@Preview(showBackground = true, widthDp = 640, heightDp = 320)

View File

@@ -32,6 +32,7 @@ import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import org.fairscan.app.AppContainer
import org.fairscan.app.domain.CapturedPage
import org.fairscan.app.domain.PageMetadata
import org.fairscan.imageprocessing.Mask
import org.fairscan.imageprocessing.Quad
import org.fairscan.imageprocessing.detectDocumentQuad
@@ -118,9 +119,9 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
if (imageProxy != null) {
viewModelScope.launch {
val source = imageProxy.toBitmap()
val processed = processCapturedImage(source, imageProxy.imageInfo.rotationDegrees)
val page = processCapturedImage(source, imageProxy.imageInfo.rotationDegrees)
imageProxy.close()
onCaptureProcessed(processed?.let { CapturedPage(processed, source) })
onCaptureProcessed(page)
}
} else {
onCaptureProcessed(null)
@@ -128,11 +129,11 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
}
private suspend fun processCapturedImage(
bitmap: Bitmap,
source: Bitmap,
rotationDegrees: Int
): Bitmap? = withContext(Dispatchers.IO) {
var corrected: Bitmap? = null
val segmentation = imageSegmentationService.runSegmentationAndReturn(bitmap, 0)
): CapturedPage? = withContext(Dispatchers.IO) {
var result: CapturedPage? = null
val segmentation = imageSegmentationService.runSegmentationAndReturn(source, 0)
if (segmentation != null) {
val mask = segmentation.segmentation
var quad = detectDocumentQuad(mask, isLiveAnalysis = false)
@@ -152,11 +153,11 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() {
}
}
if (quad != null) {
val resizedQuad = quad.scaledTo(mask.width, mask.height, bitmap.width, bitmap.height)
corrected = extractDocumentFromBitmap(bitmap, resizedQuad, rotationDegrees, mask)
val resizedQuad = quad.scaledTo(mask.width, mask.height, source.width, source.height)
result = extractDocumentFromBitmap(source, resizedQuad, rotationDegrees, mask)
}
}
return@withContext corrected
return@withContext result
}
fun addProcessedImage() {
@@ -187,17 +188,22 @@ sealed class CaptureState {
) : CaptureState()
}
fun extractDocumentFromBitmap(image: Bitmap, quad: Quad, rotationDegrees: Int, mask: Mask): Bitmap {
fun extractDocumentFromBitmap(
source: Bitmap, quad: Quad, rotationDegrees: Int, mask: Mask
): CapturedPage {
val rgba = Mat()
Utils.bitmapToMat(image, rgba)
Utils.bitmapToMat(source, rgba)
val bgr = Mat()
Imgproc.cvtColor(rgba, bgr, Imgproc.COLOR_RGBA2BGR) // CV_8UC4 → CV_8UC3
rgba.release()
val outBgr = extractDocument(bgr, quad, rotationDegrees, mask)
val page = extractDocument(bgr, quad, rotationDegrees, mask)
val outBgr = page.image
bgr.release()
val outBitmap = toBitmap(outBgr)
outBgr.release()
return outBitmap
val normalizedQuad = quad.scaledTo(source.width, source.height, 1, 1)
val metadata = PageMetadata(normalizedQuad, rotationDegrees, page.pageAnalysis.isColored)
return CapturedPage(outBitmap, source, metadata)
}
fun toBitmap(bgr: Mat): Bitmap {

View File

@@ -15,7 +15,9 @@
package org.fairscan.app.data
import org.assertj.core.api.Assertions.assertThat
import org.fairscan.app.data.ImageTransformations
import org.fairscan.app.domain.PageMetadata
import org.fairscan.imageprocessing.Point
import org.fairscan.imageprocessing.Quad
import org.junit.Rule
import org.junit.Test
import org.junit.rules.TemporaryFolder
@@ -28,6 +30,9 @@ class ImageRepositoryTest {
private var _filesDir: File? = null
val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09))
val metadata1 = PageMetadata(quad1, 90, true)
fun getFilesDir(): File {
if (_filesDir == null) {
_filesDir = folder.newFolder("files_dir")
@@ -52,17 +57,25 @@ class ImageRepositoryTest {
val repo = repo()
assertThat(repo.imageIds()).isEmpty()
val bytes = byteArrayOf(101, 102, 103)
repo.add(bytes)
repo.add(bytes, byteArrayOf(51), metadata1)
assertThat(repo.imageIds()).hasSize(1)
assertThat(repo.getContent(repo.imageIds()[0])).isEqualTo(bytes)
assertThat(repo.getThumbnail(repo.imageIds()[0])).isEqualTo(byteArrayOf(101))
val id = repo.imageIds()[0]
assertThat(repo.getContent(id)).isEqualTo(bytes)
assertThat(repo.getThumbnail(id)).isEqualTo(byteArrayOf(101))
assertThat(repo().getPageMetadata("x")).isNull()
val metadata = repo.getPageMetadata(id)
assertThat(metadata).isNotNull()
assertThat(metadata!!.normalizedQuad).isEqualTo(quad1)
assertThat(metadata.rotationDegrees).isEqualTo(metadata1.rotationDegrees)
assertThat(metadata.isColored).isEqualTo(metadata1.isColored)
}
@Test
fun delete_image() {
val repo = repo()
val bytes = byteArrayOf(101, 102, 103)
repo.add(bytes)
repo.add(bytes, byteArrayOf(51), metadata1)
assertThat(repo.imageIds()).hasSize(1)
repo.delete(repo.imageIds()[0])
assertThat(repo.imageIds()).isEmpty()
@@ -106,7 +119,7 @@ class ImageRepositoryTest {
fun `clear should delete pages`() {
val bytes = byteArrayOf(101, 102, 103)
val repo1 = repo()
repo1.add(bytes)
repo1.add(bytes, byteArrayOf(51), metadata1)
assertThat(repo1.imageIds()).isNotEmpty()
repo1.clear()
assertThat(repo1.imageIds()).isEmpty()
@@ -123,7 +136,7 @@ class ImageRepositoryTest {
@Test
fun rotate() {
val repo = repo()
repo.add(byteArrayOf(101, 102, 103))
repo.add(byteArrayOf(101, 102, 103), byteArrayOf(51), metadata1)
val id0 = repo.imageIds().last()
val baseId = id0.substring(0, id0.length - 4)
@@ -151,9 +164,9 @@ class ImageRepositoryTest {
@Test
fun movePage() {
val repo = repo()
repo.add(byteArrayOf(101))
repo.add(byteArrayOf(101), byteArrayOf(51), metadata1)
Thread.sleep(1L) // to avoid file name clashes
repo.add(byteArrayOf(110))
repo.add(byteArrayOf(110), byteArrayOf(51), metadata1)
val id0 = repo.imageIds().first()
val id1 = repo.imageIds().last()
repo.movePage(id1, 0)
@@ -162,4 +175,18 @@ class ImageRepositoryTest {
val repo2 = repo()
assertThat(repo2.imageIds()).containsExactly(id1, id0)
}
@Test
fun metadata() {
val quad = quad1.toSerializable()
assertThat(Page("f1", null, 0, true).toMetadata()).isNull()
assertThat(Page("f1", quad, 0, null).toMetadata()).isNull()
listOf(true, false).forEach { isColored ->
val metadata = Page("f1", quad, 0, isColored).toMetadata()
assertThat(metadata).isNotNull()
assertThat(metadata!!.isColored).isEqualTo(isColored)
}
}
}

View File

@@ -14,11 +14,10 @@
*/
package org.fairscan.evaluation
import org.fairscan.imageprocessing.ExtractedDocument
import org.fairscan.imageprocessing.detectDocumentQuad
import org.fairscan.imageprocessing.extractDocument
import org.fairscan.imageprocessing.isColoredDocument
import org.fairscan.imageprocessing.scaledTo
import org.opencv.core.Mat
import org.opencv.imgcodecs.Imgcodecs
import java.io.File
@@ -61,11 +60,11 @@ object ColorDetectionEvaluator {
val quad = detectDocumentQuad(mask, isLiveAnalysis = false)
?.scaledTo(mask.width, mask.height, mat.width(), mat.height())
val document: Mat = if (quad != null) {
val extracted: ExtractedDocument = if (quad != null) {
extractDocument(mat, quad, 0, mask)
} else continue
val detected = isColoredDocument(mat, mask, quad)
val detected = extracted.pageAnalysis.isColored
nbProcessedImages++
@@ -73,7 +72,7 @@ object ColorDetectionEvaluator {
Imgcodecs.imwrite(inputOut.absolutePath, mat)
val outputOut = File(outputDir, "${imgName}_output.jpg")
Imgcodecs.imwrite(outputOut.absolutePath, document)
Imgcodecs.imwrite(outputOut.absolutePath, extracted.image)
results += ColorResult(
imgName,

View File

@@ -71,7 +71,7 @@ object DatasetEvaluator {
?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height())
val corrected: Mat? = if (quad != null) {
extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask)
extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask).image
} else null
val inputOut = File(outputDir, "${e.name}_input.jpg")

View File

@@ -32,6 +32,15 @@ interface Mask {
fun toMat(): Mat
}
data class PageAnalysis(
val isColored: Boolean,
)
data class ExtractedDocument(
val image: Mat,
val pageAnalysis: PageAnalysis,
)
fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Double = 0.02): Quad? {
val mat = mask.toMat()
val (biggest: MatOfPoint2f?, area) = biggestContour(mat)
@@ -117,7 +126,7 @@ fun extractDocument(
quad: Quad,
rotationDegrees: Int,
mask: Mask,
): Mat {
): ExtractedDocument {
val widthTop = norm(quad.topLeft, quad.topRight)
val widthBottom = norm(quad.bottomLeft, quad.bottomRight)
val targetWidth = (widthTop + widthBottom) / 2
@@ -134,14 +143,14 @@ fun extractDocument(
)
val dstPoints = MatOfPoint2f(
org.opencv.core.Point(0.0, 0.0),
org.opencv.core.Point(targetWidth.toDouble(), 0.0),
org.opencv.core.Point(targetWidth.toDouble(), targetHeight.toDouble()),
org.opencv.core.Point(0.0, targetHeight.toDouble())
org.opencv.core.Point(targetWidth, 0.0),
org.opencv.core.Point(targetWidth, targetHeight),
org.opencv.core.Point(0.0, targetHeight)
)
val transform = Imgproc.getPerspectiveTransform(srcPoints, dstPoints)
val outputMat = Mat()
val outputSize = Size(targetWidth.toDouble(), targetHeight.toDouble())
val outputSize = Size(targetWidth, targetHeight)
Imgproc.warpPerspective(inputMat, outputMat, transform, outputSize)
val resized = resize(outputMat, 1500.0)
@@ -149,7 +158,7 @@ fun extractDocument(
val enhanced = enhanceCapturedImage(resized, isColored)
val rotated = rotate(enhanced, rotationDegrees)
return rotated
return ExtractedDocument(rotated, PageAnalysis(isColored))
}
fun resize(original: Mat, targetMax: Double): Mat {