diff --git a/app/src/main/java/org/fairscan/app/MainViewModel.kt b/app/src/main/java/org/fairscan/app/MainViewModel.kt index 0a636c8..4bd695c 100644 --- a/app/src/main/java/org/fairscan/app/MainViewModel.kt +++ b/app/src/main/java/org/fairscan/app/MainViewModel.kt @@ -103,7 +103,8 @@ class MainViewModel(val imageRepository: ImageRepository, launchMode: LaunchMode viewModelScope.launch { imageRepository.add( compressJpeg(capturedPage.page, 75), - compressJpeg(capturedPage.source, 90) + compressJpeg(capturedPage.source, 90), + capturedPage.metadata, ) _pageIds.value = imageRepository.imageIds() } diff --git a/app/src/main/java/org/fairscan/app/data/DocumentMetadata.kt b/app/src/main/java/org/fairscan/app/data/DocumentMetadata.kt index 1d8478f..015bbc8 100644 --- a/app/src/main/java/org/fairscan/app/data/DocumentMetadata.kt +++ b/app/src/main/java/org/fairscan/app/data/DocumentMetadata.kt @@ -25,4 +25,21 @@ data class DocumentMetadata( @Serializable data class Page( val file: String, + val quad: NormalizedQuad? = null, + val rotationDegrees: Int = 0, + val isColored: Boolean? = null +) + +@Serializable +data class NormalizedQuad( + val topLeft: PointD, + val topRight: PointD, + val bottomRight: PointD, + val bottomLeft: PointD +) + +@Serializable +data class PointD( + val x: Double, + val y: Double ) diff --git a/app/src/main/java/org/fairscan/app/data/ImageRepository.kt b/app/src/main/java/org/fairscan/app/data/ImageRepository.kt index 7e31976..7b92d16 100644 --- a/app/src/main/java/org/fairscan/app/data/ImageRepository.kt +++ b/app/src/main/java/org/fairscan/app/data/ImageRepository.kt @@ -14,9 +14,12 @@ */ package org.fairscan.app.data -import kotlinx.collections.immutable.ImmutableList -import kotlinx.collections.immutable.toImmutableList +import kotlinx.collections.immutable.PersistentList +import kotlinx.collections.immutable.toPersistentList import kotlinx.serialization.json.Json +import org.fairscan.app.domain.PageMetadata +import org.fairscan.imageprocessing.Point +import org.fairscan.imageprocessing.Quad import java.io.File const val SOURCE_DIR_NAME = "sources" @@ -43,30 +46,33 @@ class ImageRepository( private val metadataFile = File(scanDir, "document.json") - private var fileNames: MutableList = - loadFileNames() + private var pages: MutableList = loadPages() - private fun loadFileNames(): MutableList { - val filesOnDisk: Set = scanDir.listFiles() + private fun loadPages(): MutableList { + val filesOnDisk = scanDir.listFiles() ?.filter { it.extension == "jpg" } ?.map { it.name } ?.toSet() ?: emptySet() - val metadataFiles: List? = loadMetadata() - ?.pages - ?.map { it.file } + val metadataPages = loadMetadata()?.pages return when { - metadataFiles != null -> metadataFiles - .filter { it in filesOnDisk } - .toMutableList() - else -> filesOnDisk - .sorted() - .toMutableList() + metadataPages != null -> + metadataPages + .filter { it.file in filesOnDisk } + .toMutableList() + else -> + filesOnDisk + .sorted() + .map { Page(file = it) } + .toMutableList() } } + private fun indexOfPage(id: String): Int = + pages.indexOfFirst { it.file == id } + private fun loadMetadata(): DocumentMetadata? = if (metadataFile.exists()) { runCatching { @@ -75,19 +81,33 @@ class ImageRepository( } else null private fun saveMetadata() { - val metadata = DocumentMetadata(version = 1, pages = fileNames.map { id -> Page(id) }) + val metadata = DocumentMetadata(version = 1, pages = pages) metadataFile.writeText(Json.encodeToString(metadata)) } - fun imageIds(): ImmutableList = fileNames.toImmutableList() + fun imageIds(): PersistentList = + pages.map { it.file }.toPersistentList() - fun add(pageBytes: ByteArray, sourceBytes: ByteArray? = null) { + fun getPageMetadata(id: String): PageMetadata? { + val index = indexOfPage(id) + if (index < 0) return null + return pages[index].toMetadata() + } + + fun add(pageBytes: ByteArray, sourceBytes: ByteArray, metadata: PageMetadata) { val fileName = "${System.currentTimeMillis()}.jpg" val file = File(scanDir, fileName) file.writeBytes(pageBytes) writeThumbnail(file) - sourceBytes?.let { File(sourceDir, fileName).writeBytes(sourceBytes) } - fileNames.add(fileName) + File(sourceDir, fileName).writeBytes(sourceBytes) + pages.add( + Page( + file = fileName, + quad = metadata.normalizedQuad.toSerializable(), + rotationDegrees = metadata.rotationDegrees, + isColored = metadata.isColored + ) + ) saveMetadata() } @@ -106,9 +126,10 @@ class ImageRepository( val rotatedFile = File(scanDir, rotatedId) transformations.rotate(originalFile, rotatedFile, clockwise) if (rotatedFile.exists()) { - val index = fileNames.indexOf(id) + val index = indexOfPage(id) if (index >= 0) { - fileNames[index] = rotatedId + val oldPage = pages[index] + pages[index] = oldPage.copy(file = rotatedId) saveMetadata() } delete(id) @@ -143,21 +164,24 @@ class ImageRepository( private fun getThumbnailFile(id: String): File = File(thumbnailDir, id) fun movePage(id: String, newIndex: Int) { - if (!fileNames.remove(id)) return - val safeIndex = newIndex.coerceIn(0, fileNames.size) - fileNames.add(safeIndex, id) + val index = indexOfPage(id) + if (index < 0) return + + val page = pages.removeAt(index) + val safeIndex = newIndex.coerceIn(0, pages.size) + pages.add(safeIndex, page) saveMetadata() } fun delete(id: String) { File(scanDir, id).delete() getThumbnailFile(id).delete() - fileNames.remove(id) + pages.removeAll { it.file == id } saveMetadata() } fun clear() { - fileNames.clear() + pages.clear() thumbnailDir.listFiles()?.forEach { file -> file.delete() } @@ -170,3 +194,24 @@ class ImageRepository( saveMetadata() // "empty" json file } } + +fun Quad.toSerializable(): NormalizedQuad = + NormalizedQuad( + topLeft = PointD(topLeft.x, topLeft.y), + topRight = PointD(topRight.x, topRight.y), + bottomRight = PointD(bottomRight.x, bottomRight.y), + bottomLeft = PointD(bottomLeft.x, bottomLeft.y) + ) + +fun NormalizedQuad.toQuad(): Quad = + Quad( + Point(topLeft.x, topLeft.y), + Point(topRight.x, topRight.y), + Point(bottomRight.x, bottomRight.y), + Point(bottomLeft.x, bottomLeft.y) +) + +fun Page.toMetadata(): PageMetadata? { + if (quad == null || isColored == null) return null + return PageMetadata(quad.toQuad(), rotationDegrees, isColored) +} diff --git a/app/src/main/java/org/fairscan/app/domain/CapturedPage.kt b/app/src/main/java/org/fairscan/app/domain/CapturedPage.kt index 352dcf5..6e8d7c7 100644 --- a/app/src/main/java/org/fairscan/app/domain/CapturedPage.kt +++ b/app/src/main/java/org/fairscan/app/domain/CapturedPage.kt @@ -19,5 +19,6 @@ import android.graphics.Bitmap data class CapturedPage( val page: Bitmap, - val source: Bitmap + val source: Bitmap, + val metadata: PageMetadata, ) diff --git a/app/src/main/java/org/fairscan/app/domain/PageMetadata.kt b/app/src/main/java/org/fairscan/app/domain/PageMetadata.kt new file mode 100644 index 0000000..a60b7f8 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/domain/PageMetadata.kt @@ -0,0 +1,23 @@ +/* + * Copyright 2025 Pierre-Yves Nicolas + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.domain + +import org.fairscan.imageprocessing.Quad + +data class PageMetadata( + val normalizedQuad: Quad, + val rotationDegrees: Int, + val isColored: Boolean, +) diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt index 5c1d7b0..dbe1198 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt @@ -85,6 +85,7 @@ import kotlinx.coroutines.delay import org.fairscan.app.MainViewModel import org.fairscan.app.R import org.fairscan.app.domain.CapturedPage +import org.fairscan.app.domain.PageMetadata import org.fairscan.app.ui.Navigation import org.fairscan.app.ui.Screen import org.fairscan.app.ui.components.CameraPermissionState @@ -95,6 +96,8 @@ import org.fairscan.app.ui.components.pageCountText import org.fairscan.app.ui.dummyNavigation import org.fairscan.app.ui.fakeDocument import org.fairscan.app.ui.theme.FairScanTheme +import org.fairscan.imageprocessing.Point +import org.fairscan.imageprocessing.Quad const val CAPTURED_IMAGE_DISPLAY_DURATION = 1500L const val ANIMATION_DURATION = 200 @@ -461,11 +464,14 @@ fun CameraScreenPreview() { @Preview(showBackground = true, showSystemUi = true) @Composable fun CameraScreenPreviewWithProcessedImage() { + val p = Point(0 , 0) + val quad = Quad(p, p, p, p) ScreenPreview(CaptureState.CapturePreview( debugImage("uncropped/img01.jpg"), CapturedPage( debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"), - debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg")))) + debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"), + PageMetadata(quad, 0, false)))) } @Preview(showBackground = true, widthDp = 640, heightDp = 320) diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt index cf75845..0574ca6 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt @@ -32,6 +32,7 @@ import kotlinx.coroutines.launch import kotlinx.coroutines.withContext import org.fairscan.app.AppContainer import org.fairscan.app.domain.CapturedPage +import org.fairscan.app.domain.PageMetadata import org.fairscan.imageprocessing.Mask import org.fairscan.imageprocessing.Quad import org.fairscan.imageprocessing.detectDocumentQuad @@ -118,9 +119,9 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { if (imageProxy != null) { viewModelScope.launch { val source = imageProxy.toBitmap() - val processed = processCapturedImage(source, imageProxy.imageInfo.rotationDegrees) + val page = processCapturedImage(source, imageProxy.imageInfo.rotationDegrees) imageProxy.close() - onCaptureProcessed(processed?.let { CapturedPage(processed, source) }) + onCaptureProcessed(page) } } else { onCaptureProcessed(null) @@ -128,11 +129,11 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { } private suspend fun processCapturedImage( - bitmap: Bitmap, + source: Bitmap, rotationDegrees: Int - ): Bitmap? = withContext(Dispatchers.IO) { - var corrected: Bitmap? = null - val segmentation = imageSegmentationService.runSegmentationAndReturn(bitmap, 0) + ): CapturedPage? = withContext(Dispatchers.IO) { + var result: CapturedPage? = null + val segmentation = imageSegmentationService.runSegmentationAndReturn(source, 0) if (segmentation != null) { val mask = segmentation.segmentation var quad = detectDocumentQuad(mask, isLiveAnalysis = false) @@ -152,11 +153,11 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { } } if (quad != null) { - val resizedQuad = quad.scaledTo(mask.width, mask.height, bitmap.width, bitmap.height) - corrected = extractDocumentFromBitmap(bitmap, resizedQuad, rotationDegrees, mask) + val resizedQuad = quad.scaledTo(mask.width, mask.height, source.width, source.height) + result = extractDocumentFromBitmap(source, resizedQuad, rotationDegrees, mask) } } - return@withContext corrected + return@withContext result } fun addProcessedImage() { @@ -187,17 +188,22 @@ sealed class CaptureState { ) : CaptureState() } -fun extractDocumentFromBitmap(image: Bitmap, quad: Quad, rotationDegrees: Int, mask: Mask): Bitmap { +fun extractDocumentFromBitmap( + source: Bitmap, quad: Quad, rotationDegrees: Int, mask: Mask +): CapturedPage { val rgba = Mat() - Utils.bitmapToMat(image, rgba) + Utils.bitmapToMat(source, rgba) val bgr = Mat() Imgproc.cvtColor(rgba, bgr, Imgproc.COLOR_RGBA2BGR) // CV_8UC4 → CV_8UC3 rgba.release() - val outBgr = extractDocument(bgr, quad, rotationDegrees, mask) + val page = extractDocument(bgr, quad, rotationDegrees, mask) + val outBgr = page.image bgr.release() val outBitmap = toBitmap(outBgr) outBgr.release() - return outBitmap + val normalizedQuad = quad.scaledTo(source.width, source.height, 1, 1) + val metadata = PageMetadata(normalizedQuad, rotationDegrees, page.pageAnalysis.isColored) + return CapturedPage(outBitmap, source, metadata) } fun toBitmap(bgr: Mat): Bitmap { diff --git a/app/src/test/java/org/fairscan/app/data/ImageRepositoryTest.kt b/app/src/test/java/org/fairscan/app/data/ImageRepositoryTest.kt index fe1ad45..52e8aef 100644 --- a/app/src/test/java/org/fairscan/app/data/ImageRepositoryTest.kt +++ b/app/src/test/java/org/fairscan/app/data/ImageRepositoryTest.kt @@ -15,7 +15,9 @@ package org.fairscan.app.data import org.assertj.core.api.Assertions.assertThat -import org.fairscan.app.data.ImageTransformations +import org.fairscan.app.domain.PageMetadata +import org.fairscan.imageprocessing.Point +import org.fairscan.imageprocessing.Quad import org.junit.Rule import org.junit.Test import org.junit.rules.TemporaryFolder @@ -28,6 +30,9 @@ class ImageRepositoryTest { private var _filesDir: File? = null + val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09)) + val metadata1 = PageMetadata(quad1, 90, true) + fun getFilesDir(): File { if (_filesDir == null) { _filesDir = folder.newFolder("files_dir") @@ -52,17 +57,25 @@ class ImageRepositoryTest { val repo = repo() assertThat(repo.imageIds()).isEmpty() val bytes = byteArrayOf(101, 102, 103) - repo.add(bytes) + repo.add(bytes, byteArrayOf(51), metadata1) assertThat(repo.imageIds()).hasSize(1) - assertThat(repo.getContent(repo.imageIds()[0])).isEqualTo(bytes) - assertThat(repo.getThumbnail(repo.imageIds()[0])).isEqualTo(byteArrayOf(101)) + val id = repo.imageIds()[0] + assertThat(repo.getContent(id)).isEqualTo(bytes) + assertThat(repo.getThumbnail(id)).isEqualTo(byteArrayOf(101)) + + assertThat(repo().getPageMetadata("x")).isNull() + val metadata = repo.getPageMetadata(id) + assertThat(metadata).isNotNull() + assertThat(metadata!!.normalizedQuad).isEqualTo(quad1) + assertThat(metadata.rotationDegrees).isEqualTo(metadata1.rotationDegrees) + assertThat(metadata.isColored).isEqualTo(metadata1.isColored) } @Test fun delete_image() { val repo = repo() val bytes = byteArrayOf(101, 102, 103) - repo.add(bytes) + repo.add(bytes, byteArrayOf(51), metadata1) assertThat(repo.imageIds()).hasSize(1) repo.delete(repo.imageIds()[0]) assertThat(repo.imageIds()).isEmpty() @@ -106,7 +119,7 @@ class ImageRepositoryTest { fun `clear should delete pages`() { val bytes = byteArrayOf(101, 102, 103) val repo1 = repo() - repo1.add(bytes) + repo1.add(bytes, byteArrayOf(51), metadata1) assertThat(repo1.imageIds()).isNotEmpty() repo1.clear() assertThat(repo1.imageIds()).isEmpty() @@ -123,7 +136,7 @@ class ImageRepositoryTest { @Test fun rotate() { val repo = repo() - repo.add(byteArrayOf(101, 102, 103)) + repo.add(byteArrayOf(101, 102, 103), byteArrayOf(51), metadata1) val id0 = repo.imageIds().last() val baseId = id0.substring(0, id0.length - 4) @@ -151,9 +164,9 @@ class ImageRepositoryTest { @Test fun movePage() { val repo = repo() - repo.add(byteArrayOf(101)) + repo.add(byteArrayOf(101), byteArrayOf(51), metadata1) Thread.sleep(1L) // to avoid file name clashes - repo.add(byteArrayOf(110)) + repo.add(byteArrayOf(110), byteArrayOf(51), metadata1) val id0 = repo.imageIds().first() val id1 = repo.imageIds().last() repo.movePage(id1, 0) @@ -162,4 +175,18 @@ class ImageRepositoryTest { val repo2 = repo() assertThat(repo2.imageIds()).containsExactly(id1, id0) } + + @Test + fun metadata() { + val quad = quad1.toSerializable() + + assertThat(Page("f1", null, 0, true).toMetadata()).isNull() + assertThat(Page("f1", quad, 0, null).toMetadata()).isNull() + + listOf(true, false).forEach { isColored -> + val metadata = Page("f1", quad, 0, isColored).toMetadata() + assertThat(metadata).isNotNull() + assertThat(metadata!!.isColored).isEqualTo(isColored) + } + } } diff --git a/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt b/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt index b7c8a3c..90493cc 100644 --- a/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt +++ b/evaluation/src/main/java/org/fairscan/evaluation/ColorDetectionEvaluator.kt @@ -14,11 +14,10 @@ */ package org.fairscan.evaluation +import org.fairscan.imageprocessing.ExtractedDocument import org.fairscan.imageprocessing.detectDocumentQuad import org.fairscan.imageprocessing.extractDocument -import org.fairscan.imageprocessing.isColoredDocument import org.fairscan.imageprocessing.scaledTo -import org.opencv.core.Mat import org.opencv.imgcodecs.Imgcodecs import java.io.File @@ -61,11 +60,11 @@ object ColorDetectionEvaluator { val quad = detectDocumentQuad(mask, isLiveAnalysis = false) ?.scaledTo(mask.width, mask.height, mat.width(), mat.height()) - val document: Mat = if (quad != null) { + val extracted: ExtractedDocument = if (quad != null) { extractDocument(mat, quad, 0, mask) } else continue - val detected = isColoredDocument(mat, mask, quad) + val detected = extracted.pageAnalysis.isColored nbProcessedImages++ @@ -73,7 +72,7 @@ object ColorDetectionEvaluator { Imgcodecs.imwrite(inputOut.absolutePath, mat) val outputOut = File(outputDir, "${imgName}_output.jpg") - Imgcodecs.imwrite(outputOut.absolutePath, document) + Imgcodecs.imwrite(outputOut.absolutePath, extracted.image) results += ColorResult( imgName, diff --git a/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt b/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt index 5f2f42c..bff721a 100644 --- a/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt +++ b/evaluation/src/main/java/org/fairscan/evaluation/DatasetEvaluator.kt @@ -71,7 +71,7 @@ object DatasetEvaluator { ?.scaledTo(mask.width, mask.height, inputMat.width(), inputMat.height()) val corrected: Mat? = if (quad != null) { - extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask) + extractDocument(inputMat, quad = quad, rotationDegrees = 0, mask).image } else null val inputOut = File(outputDir, "${e.name}_input.jpg") diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt index 996eb02..83f6421 100644 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt @@ -32,6 +32,15 @@ interface Mask { fun toMat(): Mat } +data class PageAnalysis( + val isColored: Boolean, +) + +data class ExtractedDocument( + val image: Mat, + val pageAnalysis: PageAnalysis, +) + fun detectDocumentQuad(mask: Mask, isLiveAnalysis: Boolean, minQuadAreaRatio: Double = 0.02): Quad? { val mat = mask.toMat() val (biggest: MatOfPoint2f?, area) = biggestContour(mat) @@ -117,7 +126,7 @@ fun extractDocument( quad: Quad, rotationDegrees: Int, mask: Mask, -): Mat { +): ExtractedDocument { val widthTop = norm(quad.topLeft, quad.topRight) val widthBottom = norm(quad.bottomLeft, quad.bottomRight) val targetWidth = (widthTop + widthBottom) / 2 @@ -134,14 +143,14 @@ fun extractDocument( ) val dstPoints = MatOfPoint2f( org.opencv.core.Point(0.0, 0.0), - org.opencv.core.Point(targetWidth.toDouble(), 0.0), - org.opencv.core.Point(targetWidth.toDouble(), targetHeight.toDouble()), - org.opencv.core.Point(0.0, targetHeight.toDouble()) + org.opencv.core.Point(targetWidth, 0.0), + org.opencv.core.Point(targetWidth, targetHeight), + org.opencv.core.Point(0.0, targetHeight) ) val transform = Imgproc.getPerspectiveTransform(srcPoints, dstPoints) val outputMat = Mat() - val outputSize = Size(targetWidth.toDouble(), targetHeight.toDouble()) + val outputSize = Size(targetWidth, targetHeight) Imgproc.warpPerspective(inputMat, outputMat, transform, outputSize) val resized = resize(outputMat, 1500.0) @@ -149,7 +158,7 @@ fun extractDocument( val enhanced = enhanceCapturedImage(resized, isColored) val rotated = rotate(enhanced, rotationDegrees) - return rotated + return ExtractedDocument(rotated, PageAnalysis(isColored)) } fun resize(original: Mat, targetMax: Double): Mat {