diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 26d3352..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml diff --git a/.idea/AndroidProjectSystem.xml b/.idea/AndroidProjectSystem.xml deleted file mode 100644 index 4a53bee..0000000 --- a/.idea/AndroidProjectSystem.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/compiler.xml b/.idea/compiler.xml deleted file mode 100644 index fb7f4a8..0000000 --- a/.idea/compiler.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/gradle.xml b/.idea/gradle.xml deleted file mode 100644 index f64ba30..0000000 --- a/.idea/gradle.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index 7061a0d..0000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,61 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/migrations.xml b/.idea/migrations.xml deleted file mode 100644 index f8051a6..0000000 --- a/.idea/migrations.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index d852bbf..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/runConfigurations.xml b/.idea/runConfigurations.xml deleted file mode 100644 index 16660f1..0000000 --- a/.idea/runConfigurations.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/app/build.gradle.kts b/app/build.gradle.kts index 322eb6d..aef7a05 100644 --- a/app/build.gradle.kts +++ b/app/build.gradle.kts @@ -136,6 +136,7 @@ dependencies { implementation(libs.reorderable) implementation(libs.aboutlibraries.compose.m3) implementation(libs.kotlinx.serialization.json) +implementation(libs.okhttp) testImplementation(libs.junit) testImplementation(libs.assertj) diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index 6ed47b3..a26f9c4 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -8,9 +8,10 @@ - - - + + + + viewModelFactory( crossinline create: (AppContainer) -> VM diff --git a/app/src/main/java/org/fairscan/app/MainActivity.kt b/app/src/main/java/org/fairscan/app/MainActivity.kt index b59c024..087e293 100644 --- a/app/src/main/java/org/fairscan/app/MainActivity.kt +++ b/app/src/main/java/org/fairscan/app/MainActivity.kt @@ -126,6 +126,7 @@ class MainActivity : ComponentActivity() { val documentUiState by viewModel.documentUiState.collectAsStateWithLifecycle() val cropInitialState by viewModel.cropInitState.collectAsStateWithLifecycle() val exportUiState by exportViewModel.uiState.collectAsStateWithLifecycle() + val taskPanelState by exportViewModel.taskPanelState.collectAsStateWithLifecycle() val cameraPermission = rememberCameraPermissionState() CollectCameraEvents(cameraViewModel, viewModel) CollectExportEvents(context, exportViewModel) @@ -211,7 +212,14 @@ class MainActivity : ComponentActivity() { share = { exportViewModel.onShareClicked() }, save = { exportViewModel.onSaveClicked() }, open = { item -> openUri(item.uri, item.format.mimeType, logger) }, + uploadToPc = { exportViewModel.uploadPdfToServer() }, + uploadAndProcess = { processType -> exportViewModel.uploadAndProcess(processType) }, + downloadResult = { task, destDirUri, context -> + exportViewModel.downloadResult(task, destDirUri, context) + }, + resetDownloadState = { exportViewModel.resetDownloadState() }, ), + taskPanelState = taskPanelState, onCloseScan = { exportViewModel.resetFilename() viewModel.startNewDocument() @@ -295,6 +303,14 @@ class MainActivity : ComponentActivity() { onResetExportDirClick = { settingsViewModel.setExportDirUri(null) }, onExportFormatChanged = { format -> settingsViewModel.setExportFormat(format) }, onExportQualityChanged = { quality -> settingsViewModel.setExportQuality(quality) }, + onServerHostChanged = { host -> settingsViewModel.setServerHost(host) }, + onServerPortChanged = { port -> settingsViewModel.setServerPort(port) }, + onStreamQualityChanged = { quality -> settingsViewModel.setStreamQuality(quality) }, + onPostProcessModeChanged = { mode -> settingsViewModel.setPostProcessMode(mode) }, + onAutoDownloadChanged = { enabled -> settingsViewModel.setAutoDownloadProcessedResult(enabled) }, + onStreamFrameRateChanged = { rate -> settingsViewModel.setStreamFrameRate(rate) }, + onScanNetworkHostsClick = { /* TODO: Implement network discovery */ }, + onTestConnectionClick = { /* TODO: Implement connection test */ }, onBack = nav.back, ) } diff --git a/app/src/main/java/org/fairscan/app/network/NetworkInfoProvider.kt b/app/src/main/java/org/fairscan/app/network/NetworkInfoProvider.kt new file mode 100644 index 0000000..44188fd --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/NetworkInfoProvider.kt @@ -0,0 +1,34 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network + +import java.net.Inet4Address +import java.net.NetworkInterface + +class NetworkInfoProvider { + + fun getLocalIpAddress(): String? { + return try { + NetworkInterface.getNetworkInterfaces().asSequence() + .flatMap { it.inetAddresses.asSequence() } + .filterNot { it.isLoopbackAddress } + .filterIsInstance() + .firstOrNull() + ?.hostAddress + } catch (e: Exception) { + null + } + } +} diff --git a/app/src/main/java/org/fairscan/app/network/ServerEndpoint.kt b/app/src/main/java/org/fairscan/app/network/ServerEndpoint.kt new file mode 100644 index 0000000..838cac2 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/ServerEndpoint.kt @@ -0,0 +1,24 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network + +data class ServerEndpoint( + val host: String, + val port: Int, + val protocol: String = "http", +) { + val url: String get() = "$protocol://$host:$port" + val wsUrl: String get() = "ws://$host:$port" +} diff --git a/app/src/main/java/org/fairscan/app/network/discovery/DiscoveredHost.kt b/app/src/main/java/org/fairscan/app/network/discovery/DiscoveredHost.kt new file mode 100644 index 0000000..c5b0485 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/discovery/DiscoveredHost.kt @@ -0,0 +1,24 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.discovery + +data class DiscoveredHost( + val serviceName: String, + val displayName: String, + val host: String, + val port: Int, + val features: List = emptyList(), + val version: String? = null, +) diff --git a/app/src/main/java/org/fairscan/app/network/discovery/DiscoveryState.kt b/app/src/main/java/org/fairscan/app/network/discovery/DiscoveryState.kt new file mode 100644 index 0000000..2955ca1 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/discovery/DiscoveryState.kt @@ -0,0 +1,23 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.discovery + +sealed class DiscoveryState { + data object Idle : DiscoveryState() + data object Discovering : DiscoveryState() + data class Success(val hosts: List) : DiscoveryState() + data object Empty : DiscoveryState() + data class Error(val message: String) : DiscoveryState() +} diff --git a/app/src/main/java/org/fairscan/app/network/discovery/LanServiceDiscovery.kt b/app/src/main/java/org/fairscan/app/network/discovery/LanServiceDiscovery.kt new file mode 100644 index 0000000..bccaeca --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/discovery/LanServiceDiscovery.kt @@ -0,0 +1,22 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.discovery + +import kotlinx.coroutines.flow.Flow + +interface LanServiceDiscovery { + suspend fun startDiscovery(serviceType: String): Flow + suspend fun stopDiscovery() +} diff --git a/app/src/main/java/org/fairscan/app/network/stream/FrameCompressor.kt b/app/src/main/java/org/fairscan/app/network/stream/FrameCompressor.kt new file mode 100644 index 0000000..e25a069 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/stream/FrameCompressor.kt @@ -0,0 +1,53 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.stream + +import android.graphics.Bitmap +import android.graphics.BitmapFactory +import java.io.ByteArrayOutputStream + +class FrameCompressor { + + /** + * Compress a bitmap to JPEG bytes with resize and quality settings. + * Returns null if compression fails. + */ + fun compress( + source: Bitmap, + maxDimension: Int, + jpegQuality: Int, + ): ByteArray? { + return try { + val resized = resizeIfNeeded(source, maxDimension) + val output = ByteArrayOutputStream() + resized.compress(Bitmap.CompressFormat.JPEG, jpegQuality, output) + output.toByteArray() + } catch (e: Exception) { + null + } + } + + private fun resizeIfNeeded(bitmap: Bitmap, maxDimension: Int): Bitmap { + val width = bitmap.width + val height = bitmap.height + val max = maxOf(width, height) + if (max <= maxDimension) return bitmap + + val ratio = maxDimension.toFloat() / max + val newWidth = (width * ratio).toInt() + val newHeight = (height * ratio).toInt() + return Bitmap.createScaledBitmap(bitmap, newWidth, newHeight, true) + } +} diff --git a/app/src/main/java/org/fairscan/app/network/stream/FrameDropController.kt b/app/src/main/java/org/fairscan/app/network/stream/FrameDropController.kt new file mode 100644 index 0000000..3ce6966 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/stream/FrameDropController.kt @@ -0,0 +1,53 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.stream + +/** + * Controls frame dropping based on minimum interval between sends. + * If a frame arrives before the minimum interval has elapsed, it is dropped. + */ +class FrameDropController { + @Volatile + private var lastSendTimeMs: Long = 0L + + private val isSending = java.util.concurrent.atomic.AtomicBoolean(false) + + /** + * Returns true if this frame should be dropped. + * @param minIntervalMs Minimum interval between frames in ms. + * If <= 0, no time-based dropping (only isSending guard). + */ + fun shouldDrop(minIntervalMs: Long): Boolean { + if (isSending.get()) return true + // Unlimited mode: no time-based dropping + if (minIntervalMs <= 0) return false + val now = System.currentTimeMillis() + if (now - lastSendTimeMs < minIntervalMs) return true + return false + } + + fun onFrameSent() { + lastSendTimeMs = System.currentTimeMillis() + } + + fun markSending(value: Boolean) { + isSending.set(value) + } + + fun reset() { + lastSendTimeMs = 0L + isSending.set(false) + } +} diff --git a/app/src/main/java/org/fairscan/app/network/stream/OkHttpStreamClient.kt b/app/src/main/java/org/fairscan/app/network/stream/OkHttpStreamClient.kt new file mode 100644 index 0000000..aca91b2 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/stream/OkHttpStreamClient.kt @@ -0,0 +1,86 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.stream + +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.flow.asStateFlow +import okhttp3.OkHttpClient +import okio.ByteString.Companion.toByteString +import okhttp3.Request +import okhttp3.Response +import okhttp3.WebSocket +import okhttp3.WebSocketListener +import org.fairscan.app.network.ServerEndpoint +import java.util.concurrent.TimeUnit + +class OkHttpStreamClient( + private val okHttpClient: OkHttpClient = OkHttpClient.Builder() + .connectTimeout(5, TimeUnit.SECONDS) + .readTimeout(0, TimeUnit.SECONDS) // No read timeout for streaming + .writeTimeout(0, TimeUnit.SECONDS) // No write timeout for streaming + .build(), +) : StreamClient { + + private val _state = MutableStateFlow(StreamState.Disconnected) + override val state: StateFlow = _state.asStateFlow() + + private var webSocket: WebSocket? = null + + override suspend fun connect(endpoint: ServerEndpoint) { + if (_state.value is StreamState.Connected || _state.value is StreamState.Connecting) return + + _state.value = StreamState.Connecting + val request = Request.Builder() + .url("ws://${endpoint.host}:${endpoint.port}/stream") + .build() + + webSocket = okHttpClient.newWebSocket(request, object : WebSocketListener() { + override fun onOpen(webSocket: WebSocket, response: Response) { + _state.value = StreamState.Connected + } + + override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) { + _state.value = StreamState.Error(t.message ?: "Connection failed") + } + + override fun onClosed(webSocket: WebSocket, code: Int, reason: String) { + _state.value = StreamState.Disconnected + } + + override fun onClosing(webSocket: WebSocket, code: Int, reason: String) { + webSocket.close(1000, "Client closing") + } + }) + } + + override fun sendFrame(frameData: ByteArray): Boolean { + val ws = webSocket ?: return false + return ws.send(frameData.toByteString()) + } + + override suspend fun disconnect() { + webSocket?.close(1000, "Client disconnect") + webSocket = null + _state.value = StreamState.Disconnected + } +} + +interface StreamClient { + val state: StateFlow + suspend fun connect(endpoint: ServerEndpoint) + fun sendFrame(frameData: ByteArray): Boolean + suspend fun disconnect() +} diff --git a/app/src/main/java/org/fairscan/app/network/stream/StreamQualityPreset.kt b/app/src/main/java/org/fairscan/app/network/stream/StreamQualityPreset.kt new file mode 100644 index 0000000..218ec6c --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/stream/StreamQualityPreset.kt @@ -0,0 +1,32 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.stream + +import org.fairscan.app.ui.screens.settings.StreamQuality + +data class StreamQualityPreset( + val label: String, + val maxResolution: Int, + val jpegQuality: Int, + val targetFps: Int, +) { + val minIntervalMs: Long get() = (1000L / targetFps).coerceAtLeast(50L) +} + +fun StreamQuality.toPreset(): StreamQualityPreset = when (this) { + StreamQuality.LOW -> StreamQualityPreset("Low", 640, 45, 10) + StreamQuality.BALANCED -> StreamQualityPreset("Balanced", 960, 60, 8) + StreamQuality.HIGH -> StreamQualityPreset("High", 1280, 75, 6) +} diff --git a/app/src/main/java/org/fairscan/app/network/stream/StreamState.kt b/app/src/main/java/org/fairscan/app/network/stream/StreamState.kt new file mode 100644 index 0000000..73a2a2d --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/stream/StreamState.kt @@ -0,0 +1,22 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.stream + +sealed class StreamState { + data object Disconnected : StreamState() + data object Connecting : StreamState() + data object Connected : StreamState() + data class Error(val message: String) : StreamState() +} diff --git a/app/src/main/java/org/fairscan/app/network/tasks/TaskClient.kt b/app/src/main/java/org/fairscan/app/network/tasks/TaskClient.kt new file mode 100644 index 0000000..de31e96 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/tasks/TaskClient.kt @@ -0,0 +1,188 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.tasks + +import okhttp3.MediaType.Companion.toMediaType +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.RequestBody.Companion.toRequestBody +import org.fairscan.app.network.ServerEndpoint +import org.fairscan.app.network.upload.PdfUploadClient +import java.io.File +import java.io.FileOutputStream +import java.io.IOException +import java.util.concurrent.TimeUnit + +/** + * Client for task management operations on the FairScan PC server. + */ +class TaskClient( + private val okHttpClient: OkHttpClient, +) { + private val downloadClient = okHttpClient.newBuilder() + .connectTimeout(10, TimeUnit.SECONDS) + .readTimeout(120, TimeUnit.SECONDS) + .build() + + /** + * Create a processing task for an uploaded PDF. + * + * @param endpoint Server endpoint. + * @param fileName Name of the uploaded PDF file. + * @param mode Processing mode (e.g., "OCRPdf" or "Markdown"). + * @return ProcessTaskResult with the assigned task ID. + */ + fun processPdf( + endpoint: ServerEndpoint, + fileId: String, + processType: String = "ocrpdf", + ): ProcessTaskResult { + val url = "${endpoint.url}/tasks/process" + val json = """{"fileId":"$fileId","processType":"$processType"}""" + val requestBody = json.toRequestBody("application/json".toMediaType()) + + val request = Request.Builder() + .url(url) + .post(requestBody) + .build() + + val response = okHttpClient.newCall(request).execute() + val body = response.body?.string() ?: throw IOException("Empty response") + + if (!response.isSuccessful) { + throw IOException("Failed to create task (${response.code}): $body") + } + + val taskId = PdfUploadClient.extractJsonString(body, "taskId") ?: "" + val status = PdfUploadClient.extractJsonString(body, "status") ?: "unknown" + return ProcessTaskResult(taskId, status, "") + } + + /** + * Get the current status of a processing task. + */ + fun getTaskStatus(endpoint: ServerEndpoint, taskId: String): TaskStatus { + val url = "${endpoint.url}/tasks/$taskId" + val request = Request.Builder().url(url).get().build() + val response = okHttpClient.newCall(request).execute() + val body = response.body?.string() ?: throw IOException("Empty response") + + if (!response.isSuccessful) { + throw IOException("Failed to get task status (${response.code}): $body") + } + + return TaskStatus( + taskId = PdfUploadClient.extractJsonString(body, "taskId") ?: taskId, + status = PdfUploadClient.extractJsonString(body, "status") ?: "unknown", + progress = extractJsonInt(body, "progress") ?: 0, + fileName = PdfUploadClient.extractJsonString(body, "fileName") ?: "", + createdAt = PdfUploadClient.extractJsonString(body, "createdAt") ?: "", + message = PdfUploadClient.extractJsonString(body, "message") ?: "", + ) + } + + /** + * List artifacts (result files) for a completed task. + */ + fun listArtifacts(endpoint: ServerEndpoint, taskId: String): List { + val url = "${endpoint.url}/tasks/$taskId/artifacts" + val request = Request.Builder().url(url).get().build() + val response = okHttpClient.newCall(request).execute() + val body = response.body?.string() ?: throw IOException("Empty response") + + if (!response.isSuccessful) { + throw IOException("Failed to list artifacts (${response.code}): $body") + } + + return parseArtifactList(body) + } + + /** + * Download an artifact to a destination file. + * + * @return The destination file (same as [destFile]). + */ + fun downloadArtifact( + endpoint: ServerEndpoint, + artifactId: String, + destFile: File, + onProgress: ((Float) -> Unit)? = null, + ): File { + val url = "${endpoint.url}/artifacts/$artifactId/download" + val request = Request.Builder().url(url).get().build() + + val response = downloadClient.newCall(request).execute() + if (!response.isSuccessful) { + throw IOException("Failed to download artifact (${response.code})") + } + + val body = response.body ?: throw IOException("Empty response body") + + destFile.parentFile?.mkdirs() + val total = body.contentLength() + var bytesRead = 0L + + body.byteStream().use { input -> + FileOutputStream(destFile).use { output -> + val buffer = ByteArray(8192) + var read: Int + while (input.read(buffer).also { read = it } != -1) { + output.write(buffer, 0, read) + bytesRead += read + if (onProgress != null && total > 0) { + onProgress(bytesRead.toFloat() / total) + } + } + } + } + + return destFile + } + + private fun parseArtifactList(json: String): List { + val artifacts = mutableListOf() + var pos = json.indexOf('[') + if (pos < 0) return artifacts + + pos = json.indexOf('{', pos) + while (pos >= 0) { + val end = json.indexOf('}', pos) + if (end < 0) break + val obj = json.substring(pos, end + 1) + val id = PdfUploadClient.extractJsonString(obj, "id") + ?: PdfUploadClient.extractJsonString(obj, "artifactId") + val fileName = PdfUploadClient.extractJsonString(obj, "fileName") ?: "" + val fileSize = extractJsonLong(obj, "fileSize") ?: 0L + val fileType = PdfUploadClient.extractJsonString(obj, "fileType") ?: "" + if (id != null) { + artifacts.add(ArtifactInfo(id, fileName, fileSize, fileType)) + } + pos = json.indexOf('{', end + 1) + } + return artifacts + } + + companion object { + fun extractJsonInt(json: String, key: String): Int? { + val pattern = "\"$key\"\\s*:\\s*(\\d+)".toRegex() + return pattern.find(json)?.groupValues?.getOrNull(1)?.toIntOrNull() + } + + fun extractJsonLong(json: String, key: String): Long? { + val pattern = "\"$key\"\\s*:\\s*(\\d+)".toRegex() + return pattern.find(json)?.groupValues?.getOrNull(1)?.toLongOrNull() + } + } +} diff --git a/app/src/main/java/org/fairscan/app/network/tasks/TaskModels.kt b/app/src/main/java/org/fairscan/app/network/tasks/TaskModels.kt new file mode 100644 index 0000000..4977f84 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/tasks/TaskModels.kt @@ -0,0 +1,46 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.tasks + +/** + * Status of a processing task on the PC server. + */ +data class TaskStatus( + val taskId: String, + val status: String, // queued, processing, completed, failed + val progress: Int = 0, // 0-100 + val fileName: String = "", + val createdAt: String = "", + val message: String = "", +) + +/** + * Information about a processed artifact (result file) on the PC server. + */ +data class ArtifactInfo( + val artifactId: String, + val fileName: String, + val fileSize: Long = 0, + val fileType: String = "", // "pdf", "markdown", "md", etc. +) + +/** + * Result of creating a processing task. + */ +data class ProcessTaskResult( + val taskId: String, + val status: String, + val message: String = "", +) diff --git a/app/src/main/java/org/fairscan/app/network/upload/PdfUploadClient.kt b/app/src/main/java/org/fairscan/app/network/upload/PdfUploadClient.kt new file mode 100644 index 0000000..06ba895 --- /dev/null +++ b/app/src/main/java/org/fairscan/app/network/upload/PdfUploadClient.kt @@ -0,0 +1,124 @@ +/* + * Copyright 2025-2026 The FairScan authors + * + * This program is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation, either version 3 of the License, or (at your option) + * any later version. + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +package org.fairscan.app.network.upload + +import okhttp3.MediaType.Companion.toMediaType +import okhttp3.MultipartBody +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.RequestBody +import org.fairscan.app.network.ServerEndpoint +import java.io.File +import java.io.IOException +import java.util.concurrent.TimeUnit + +/** + * Result of a PDF upload operation. + */ +data class UploadResult( + val fileId: String, + val fileName: String = "", + val sizeBytes: Long = 0, +) + +/** + * Client for uploading PDF files to the FairScan PC server. + */ +class PdfUploadClient( + private val okHttpClient: OkHttpClient, +) { + private val uploadTimeoutClient = okHttpClient.newBuilder() + .connectTimeout(10, TimeUnit.SECONDS) + .readTimeout(60, TimeUnit.SECONDS) + .writeTimeout(120, TimeUnit.SECONDS) // Large files need time to upload + .build() + + /** + * Upload a PDF file to the PC server. + * + * @param endpoint The server endpoint to upload to. + * @param file The PDF file to upload. + * @param onProgress Callback with progress 0.0..1.0 (approximate, based on bytes written). + * @return UploadResult with the task ID assigned by the server. + * @throws IOException on network or server error. + */ + fun uploadPdf( + endpoint: ServerEndpoint, + file: File, + onProgress: ((Float) -> Unit)? = null, + ): UploadResult { + val url = "${endpoint.url}/upload/pdf" + + val fileBody = object : RequestBody() { + override fun contentType() = "application/pdf".toMediaType() + override fun contentLength() = file.length() + + override fun writeTo(sink: okio.BufferedSink) { + val buffer = ByteArray(8192) + val total = file.length() + var written = 0L + file.inputStream().use { input -> + var bytesRead: Int + while (input.read(buffer).also { bytesRead = it } != -1) { + sink.write(buffer, 0, bytesRead) + written += bytesRead + if (onProgress != null && total > 0) { + onProgress(written.toFloat() / total) + } + } + } + } + } + + val requestBody = MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("file", file.name, fileBody) + .build() + + val request = Request.Builder() + .url(url) + .post(requestBody) + .build() + + val response = uploadTimeoutClient.newCall(request).execute() + val responseBody = response.body?.string() ?: throw IOException("Empty response from server") + + if (!response.isSuccessful) { + throw IOException("Upload failed (${response.code}): $responseBody") + } + + // Parse JSON response — simple manual parse to avoid adding a JSON library + return parseUploadResponse(responseBody) + } + + private fun parseUploadResponse(json: String): UploadResult { + val fileId = extractJsonString(json, "fileId") ?: "" + val fileName = extractJsonString(json, "fileName") ?: "" + val sizeBytes = extractJsonLong(json, "sizeBytes") ?: 0L + return UploadResult(fileId, fileName, sizeBytes) + } + + companion object { + fun extractJsonString(json: String, key: String): String? { + val pattern = "\"$key\"\\s*:\\s*\"([^\"]*)\"".toRegex() + return pattern.find(json)?.groupValues?.getOrNull(1) + } + + fun extractJsonLong(json: String, key: String): Long? { + val pattern = "\"$key\"\\s*:\\s*(\\d+)".toRegex() + return pattern.find(json)?.groupValues?.getOrNull(1)?.toLongOrNull() + } + } +} diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt index 873bfd5..3f4b4cf 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt @@ -49,6 +49,7 @@ import androidx.compose.foundation.shape.CircleShape import androidx.compose.foundation.shape.RoundedCornerShape import androidx.compose.material.icons.Icons import androidx.compose.material.icons.filled.AddPhotoAlternate +import androidx.compose.material.icons.filled.Cast import androidx.compose.material.icons.filled.Done import androidx.compose.material.icons.filled.Highlight import androidx.compose.material3.Button @@ -104,6 +105,7 @@ import org.fairscan.app.domain.CapturedPage import org.fairscan.app.domain.Jpeg import org.fairscan.app.domain.PageMetadata import org.fairscan.app.domain.Rotation.R0 +import org.fairscan.app.network.stream.StreamState import org.fairscan.app.ui.Navigation import org.fairscan.app.ui.Screen import org.fairscan.app.ui.components.CameraPermissionState @@ -140,6 +142,10 @@ fun CameraScreen( val isTorchEnabled by cameraViewModel.isTorchEnabled.collectAsStateWithLifecycle() var torchReapplied by remember { mutableStateOf(false) } + // Streaming state + val streamState by cameraViewModel.streamState.collectAsStateWithLifecycle() + val streamTargetHost by cameraViewModel.streamTargetHost.collectAsStateWithLifecycle() + val captureController = remember { CameraCaptureController() } DisposableEffect(Unit) { onDispose { @@ -245,6 +251,9 @@ fun CameraScreen( isCameraPermissionGranted = cameraPermission.isGranted, onRequestCameraPermission = { cameraPermission.request() }, onImportClicked = onImportClicked, + streamState = streamState, + streamTargetHost = streamTargetHost, + onToggleStream = { cameraViewModel.toggleStreaming() }, ) } @@ -263,6 +272,9 @@ private fun CameraScreenScaffold( isCameraPermissionGranted: Boolean, onRequestCameraPermission: () -> Unit, onImportClicked: () -> Unit, + streamState: StreamState, + streamTargetHost: String?, + onToggleStream: () -> Unit, ) { var focusPoint by remember { mutableStateOf(null) } LaunchedEffect(focusPoint) { @@ -322,6 +334,15 @@ private fun CameraScreenScaffold( val page = cameraUiState.captureState.capturedPage.pageJpeg.toBitmap() CapturedImage(page.asImageBitmap(), thumbnailCoords) } + // Stream toggle button - top left of the screen + StreamToggleButton( + streamState = streamState, + streamTargetHost = streamTargetHost, + onToggle = onToggleStream, + modifier = Modifier + .align(Alignment.TopStart) + .padding(top = 48.dp, start = 8.dp), + ) } } @@ -405,6 +426,49 @@ private fun CameraPreviewBox( } } +@Composable +private fun StreamToggleButton( + streamState: StreamState, + streamTargetHost: String?, + onToggle: () -> Unit, + modifier: Modifier = Modifier, +) { + val (iconTint, statusText) = when (streamState) { + StreamState.Disconnected -> Color.Gray to "图传未连接" + StreamState.Connecting -> Color(0xFFFFA000) to "图传连接中..." + StreamState.Connected -> Color(0xFF4CAF50) to "图传已连接" + is StreamState.Error -> Color(0xFFE53935) to "图传错误" + } + Column( + modifier = modifier, + horizontalAlignment = Alignment.End, + ) { + IconButton(onClick = onToggle) { + Icon( + imageVector = Icons.Default.Cast, + contentDescription = statusText, + tint = iconTint, + ) + } + if (streamState is StreamState.Connected && streamTargetHost != null) { + Text( + text = streamTargetHost, + color = Color(0xFF4CAF50), + fontSize = 10.sp, + modifier = Modifier.padding(end = 12.dp), + ) + } + if (streamState is StreamState.Error) { + Text( + text = streamState.message, + color = Color(0xFFE53935), + fontSize = 10.sp, + modifier = Modifier.padding(end = 12.dp), + ) + } + } +} + @Composable private fun CapturedImage(image: ImageBitmap, thumbnailCoords: MutableState) { Surface( @@ -718,6 +782,9 @@ private fun ScreenPreview( isCameraPermissionGranted = isCameraPermissionGranted, onRequestCameraPermission = {}, onImportClicked = {}, + streamState = StreamState.Disconnected, + streamTargetHost = null, + onToggleStream = {}, ) } } diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt index 45e9424..6028cab 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt @@ -33,7 +33,15 @@ import kotlinx.coroutines.launch import kotlinx.coroutines.withContext import org.fairscan.app.AppContainer import org.fairscan.app.domain.CapturedPage +import org.fairscan.app.network.ServerEndpoint +import org.fairscan.app.network.stream.FrameCompressor +import org.fairscan.app.network.stream.FrameDropController +import org.fairscan.app.network.stream.StreamState +import org.fairscan.app.network.stream.toPreset import org.fairscan.app.platform.extractDocumentFromBitmap +import org.fairscan.app.ui.screens.settings.StreamFrameRate +import org.fairscan.app.ui.screens.settings.StreamQuality +import org.fairscan.app.ui.screens.settings.intervalMs import org.fairscan.imageprocessing.CameraIntrinsics import org.fairscan.imageprocessing.ImageSize import org.fairscan.imageprocessing.OpticalMeasures @@ -51,6 +59,11 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { private val imageLoader = appContainer.imageLoader private val logger = appContainer.logger + // Streaming components + private val streamClient = appContainer.streamClient + private val frameCompressor = FrameCompressor() + private val frameDropController = FrameDropController() + private val _events = MutableSharedFlow() val events = _events.asSharedFlow() @@ -68,10 +81,49 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { private val _isTorchEnabled = MutableStateFlow(false) val isTorchEnabled: StateFlow = _isTorchEnabled + // Streaming state + private val _streamState = MutableStateFlow(StreamState.Disconnected) + val streamState: StateFlow = _streamState.asStateFlow() + + private val _streamTargetHost = MutableStateFlow(null) + val streamTargetHost: StateFlow = _streamTargetHost.asStateFlow() + + private var cachedStreamQuality = StreamQuality.BALANCED + private var cachedStreamFrameRate = StreamFrameRate.FPS_10 + init { viewModelScope.launch { imageSegmentationService.initialize() } + // Observe stream client state + viewModelScope.launch { + streamClient.state.collect { state -> + _streamState.value = state + } + } + // Observe stream quality setting + viewModelScope.launch { + settingsRepository.streamQuality.collect { quality -> + cachedStreamQuality = quality + } + } + // Observe stream frame rate setting + viewModelScope.launch { + settingsRepository.streamFrameRate.collect { rate -> + cachedStreamFrameRate = rate + } + } + // Observe server host/port for display + viewModelScope.launch { + kotlinx.coroutines.flow.combine( + settingsRepository.serverHost, + settingsRepository.serverPort, + ) { host, port -> + if (host.isNullOrBlank()) null else "$host:$port" + }.collect { host -> + _streamTargetHost.value = host + } + } } fun resetLiveAnalysis() { @@ -103,8 +155,18 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { viewModelScope.launch { val rotationDegrees = imageProxy.imageInfo.rotationDegrees + val bitmap = imageProxy.toBitmap() + + // Streaming: send frame if connected (fire-and-forget on IO) + val currentHost = _streamTargetHost.value + if (_streamState.value is StreamState.Connected && currentHost != null) { + launch(Dispatchers.IO) { + sendStreamFrame(bitmap) + } + } + val result = withContext(Dispatchers.IO) { - imageSegmentationService.runSegmentationAndReturn(imageProxy.toBitmap()) + imageSegmentationService.runSegmentationAndReturn(bitmap) } result?.let { @@ -230,6 +292,58 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { importJob = null _importState.value = ImportState.Idle } + + // ── Streaming ── + + fun toggleStreaming() { + viewModelScope.launch { + when (_streamState.value) { + is StreamState.Disconnected, is StreamState.Error -> startStreaming() + is StreamState.Connected -> stopStreaming() + else -> { /* Connecting — ignore */ } + } + } + } + + private suspend fun startStreaming() { + val host = settingsRepository.serverHost.first() + val port = settingsRepository.serverPort.first() + if (host.isNullOrBlank()) { + _streamState.value = StreamState.Error("未配置主机地址") + return + } + frameDropController.reset() + streamClient.connect(ServerEndpoint(host, port)) + } + + private suspend fun stopStreaming() { + streamClient.disconnect() + frameDropController.reset() + } + + private suspend fun sendStreamFrame(bitmap: Bitmap) { + if (_streamState.value !is StreamState.Connected) return + + val preset = cachedStreamQuality.toPreset() + // Use explicit frame rate if set, otherwise fall back to quality preset's default + val intervalMs = cachedStreamFrameRate.intervalMs ?: preset.minIntervalMs + if (frameDropController.shouldDrop(intervalMs)) return + + frameDropController.markSending(true) + try { + val compressed = withContext(Dispatchers.IO) { + frameCompressor.compress(bitmap, preset.maxResolution, preset.jpegQuality) + } + if (compressed != null) { + frameDropController.onFrameSent() + streamClient.sendFrame(compressed) + } + } catch (_: Exception) { + // Frame send failed — drop silently, don't affect capture + } finally { + frameDropController.markSending(false) + } + } } sealed class CaptureState { diff --git a/app/src/main/java/org/fairscan/app/ui/screens/export/ExportScreen.kt b/app/src/main/java/org/fairscan/app/ui/screens/export/ExportScreen.kt index dd5274e..0bb3e4d 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/export/ExportScreen.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/export/ExportScreen.kt @@ -17,8 +17,11 @@ package org.fairscan.app.ui.screens.export import android.content.ClipData import android.content.ClipboardManager import android.content.Context +import android.net.Uri import android.text.format.Formatter import androidx.activity.compose.BackHandler +import androidx.activity.compose.rememberLauncherForActivityResult +import androidx.activity.result.contract.ActivityResultContracts import androidx.compose.animation.animateColorAsState import androidx.compose.foundation.BorderStroke import androidx.compose.foundation.Image @@ -32,6 +35,7 @@ import androidx.compose.foundation.layout.Spacer import androidx.compose.foundation.layout.fillMaxHeight import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.PaddingValues import androidx.compose.foundation.layout.heightIn import androidx.compose.foundation.layout.padding import androidx.compose.foundation.layout.size @@ -45,14 +49,18 @@ import androidx.compose.material.icons.automirrored.filled.OpenInNew import androidx.compose.material.icons.filled.CheckCircle import androidx.compose.material.icons.filled.Clear import androidx.compose.material.icons.filled.ContentCopy +import androidx.compose.material.icons.filled.CloudUpload import androidx.compose.material.icons.filled.Done import androidx.compose.material.icons.filled.Download +import androidx.compose.material.icons.filled.Error import androidx.compose.material.icons.filled.Share import androidx.compose.material3.Button import androidx.compose.material3.ButtonDefaults import androidx.compose.material3.Card import androidx.compose.material3.CardDefaults import androidx.compose.material3.CircularProgressIndicator +import androidx.compose.material3.LinearProgressIndicator +import androidx.compose.material3.OutlinedButton import androidx.compose.material3.ExperimentalMaterial3Api import androidx.compose.material3.Icon import androidx.compose.material3.IconButton @@ -108,6 +116,7 @@ fun ExportScreenWrapper( uiState: ExportUiState, currentDocument: DocumentUiModel, pdfActions: ExportActions, + taskPanelState: TaskPanelState, onCloseScan: () -> Unit, ) { BackHandler { navigation.back() } @@ -122,24 +131,37 @@ fun ExportScreenWrapper( pdfActions.setFilename(newName) } + val isBusy = uiState.isSaving + || uiState.uploadState is UploadState.Uploading + || taskPanelState.downloadState is DownloadState.Downloading + ExportScreen( onFilenameChange = onFilenameChange, uiState = uiState, currentDocument = currentDocument, navigation = navigation, + taskPanelState = taskPanelState, onShare = { - if (!uiState.isSaving) { - pdfActions.share() - } + if (!isBusy) pdfActions.share() }, onSave = { - if (!uiState.isSaving) { - pdfActions.save() - } + if (!isBusy) pdfActions.save() }, onOpen = pdfActions.open, + onUploadToPc = { + if (!isBusy && uiState.uploadState !is UploadState.Uploading) { + pdfActions.uploadToPc() + } + }, + onUploadAndProcess = { processType -> + if (!isBusy) { + pdfActions.uploadAndProcess(processType) + } + }, + onDownloadResult = pdfActions.downloadResult, + onResetDownloadState = pdfActions.resetDownloadState, onCloseScan = { - if (!uiState.isSaving) { + if (!isBusy) { if (uiState.hasSavedOrShared || uiState.isResumedScan) onCloseScan() else @@ -160,9 +182,14 @@ fun ExportScreen( uiState: ExportUiState, currentDocument: DocumentUiModel, navigation: Navigation, + taskPanelState: TaskPanelState = TaskPanelState(), onShare: () -> Unit, onSave: () -> Unit, onOpen: (SavedItem) -> Unit, + onUploadToPc: () -> Unit, + onUploadAndProcess: (processType: String) -> Unit, + onDownloadResult: (RemoteTask, Uri, Context) -> Unit = { _, _, _ -> }, + onResetDownloadState: () -> Unit = {}, onCloseScan: () -> Unit, ) { Scaffold( @@ -187,7 +214,7 @@ fun ExportScreen( ) { PdfInfosAndResultBar(uiState, currentDocument, onOpen, onThumbnailClick) Spacer(Modifier.weight(1f)) // push buttons down - MainActions(onFilenameChange, uiState, onShare, onSave, onCloseScan) + MainActions(onFilenameChange, uiState, onShare, onSave, onUploadToPc, onUploadAndProcess, onCloseScan, taskPanelState, onDownloadResult, onResetDownloadState) } } else { Row( @@ -202,7 +229,7 @@ fun ExportScreen( PdfInfosAndResultBar(uiState, currentDocument, onOpen, onThumbnailClick) } Column(modifier = Modifier.weight(1f)) { - MainActions(onFilenameChange, uiState, onShare, onSave, onCloseScan) + MainActions(onFilenameChange, uiState, onShare, onSave, onUploadToPc, onUploadAndProcess, onCloseScan, taskPanelState, onDownloadResult, onResetDownloadState) } } @@ -363,7 +390,12 @@ private fun MainActions( uiState: ExportUiState, onShare: () -> Unit, onSave: () -> Unit, + onUploadToPc: () -> Unit, + onUploadAndProcess: (processType: String) -> Unit, onCloseScan: () -> Unit, + taskPanelState: TaskPanelState = TaskPanelState(), + onDownloadResult: (RemoteTask, Uri, Context) -> Unit = { _, _, _ -> }, + onResetDownloadState: () -> Unit = {}, ) { Column( verticalArrangement = Arrangement.spacedBy(12.dp) @@ -394,6 +426,13 @@ private fun MainActions( ) } } + + // Upload to PC server + UploadToPcSection(uiState, onUploadToPc, onUploadAndProcess) + + // Task management panel + TaskPanelSection(taskPanelState, onDownloadResult, onResetDownloadState) + ExportButton( icon = Icons.Default.Done, text = stringResource(R.string.scan_button), @@ -404,6 +443,432 @@ private fun MainActions( } } +@Composable +private fun UploadToPcSection( + uiState: ExportUiState, + onUploadToPc: () -> Unit, + onUploadAndProcess: (processType: String) -> Unit, +) { + when (val uploadState = uiState.uploadState) { + is UploadState.Idle -> { + if (uiState.result is ExportResult.Pdf) { + Column(verticalArrangement = Arrangement.spacedBy(8.dp)) { + ExportButton( + icon = Icons.Default.CloudUpload, + text = "仅传输到电脑", + onClick = onUploadToPc, + modifier = Modifier.fillMaxWidth(), + isPrimary = false, + ) + Row( + horizontalArrangement = Arrangement.spacedBy(8.dp), + modifier = Modifier.fillMaxWidth() + ) { + ExportButton( + icon = Icons.Default.CloudUpload, + text = "上传并处理 (OCR PDF)", + onClick = { onUploadAndProcess("ocrpdf") }, + modifier = Modifier.weight(1f), + isPrimary = false, + ) + ExportButton( + icon = Icons.Default.CloudUpload, + text = "上传并处理 (Markdown)", + onClick = { onUploadAndProcess("markdown") }, + modifier = Modifier.weight(1f), + isPrimary = false, + ) + } + } + } + } + is UploadState.Uploading -> { + Surface( + shape = RoundedCornerShape(12.dp), + color = MaterialTheme.colorScheme.surfaceVariant.copy(alpha = 0.4f), + tonalElevation = 1.dp, + modifier = Modifier.fillMaxWidth() + ) { + Column( + modifier = Modifier.padding(12.dp), + horizontalAlignment = Alignment.CenterHorizontally, + verticalArrangement = Arrangement.spacedBy(8.dp) + ) { + Text( + "上传到电脑中...", + style = MaterialTheme.typography.bodyMedium + ) + val progressPercent = (uploadState.progress * 100).toInt() + LinearProgressIndicator( + progress = { uploadState.progress }, + modifier = Modifier.fillMaxWidth() + ) + Text( + "$progressPercent%", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurface.copy(alpha = 0.6f) + ) + } + } + } + is UploadState.Uploaded -> { + Surface( + shape = RoundedCornerShape(12.dp), + color = MaterialTheme.colorScheme.surfaceVariant, + tonalElevation = 0.dp, + modifier = Modifier.fillMaxWidth() + ) { + Column( + modifier = Modifier.padding(12.dp), + verticalArrangement = Arrangement.spacedBy(4.dp) + ) { + Row(verticalAlignment = Alignment.CenterVertically) { + Icon( + imageVector = Icons.Default.CheckCircle, + contentDescription = null, + tint = MaterialTheme.colorScheme.primary, + ) + Spacer(Modifier.width(8.dp)) + Column { + Text("已上传到电脑", style = MaterialTheme.typography.bodyMedium) + val statusText = if (uploadState.taskId != null) { + "处理任务已创建 (${uploadState.taskId.take(8)}...)" + } else { + "仅传输,未处理" + } + Text( + statusText, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurface.copy(alpha = 0.5f) + ) + } + } + } + } + } + is UploadState.Error -> { + Surface( + shape = RoundedCornerShape(12.dp), + color = MaterialTheme.colorScheme.errorContainer.copy(alpha = 0.3f), + tonalElevation = 0.dp, + modifier = Modifier.fillMaxWidth() + ) { + Column( + modifier = Modifier.padding(12.dp), + verticalArrangement = Arrangement.spacedBy(8.dp) + ) { + Row(verticalAlignment = Alignment.CenterVertically) { + Icon( + imageVector = Icons.Default.Error, + contentDescription = null, + tint = MaterialTheme.colorScheme.error, + ) + Spacer(Modifier.width(8.dp)) + Text( + "上传失败", + style = MaterialTheme.typography.bodyMedium, + color = MaterialTheme.colorScheme.error + ) + } + Text( + uploadState.message, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurface.copy(alpha = 0.6f) + ) + OutlinedButton( + onClick = onUploadToPc, + modifier = Modifier.align(Alignment.End) + ) { + Text("重试") + } + } + } + } + } +} + +@Composable +private fun TaskPanelSection( + taskPanelState: TaskPanelState, + onDownloadResult: (RemoteTask, Uri, Context) -> Unit, + onResetDownloadState: () -> Unit, +) { + val context = LocalContext.current + val tasks = taskPanelState.tasks + + // Track selected destination directories per task + val selectedDirs = remember { mutableStateOf(mapOf()) } + // Track which task's directory picker is active + val pickingForTask = remember { mutableStateOf(null) } + + val dirPickerLauncher = rememberLauncherForActivityResult( + contract = ActivityResultContracts.OpenDocumentTree() + ) { uri -> + val taskId = pickingForTask.value + if (taskId != null && uri != null) { + // Take persistent permission + context.contentResolver.takePersistableUriPermission( + uri, + android.content.Intent.FLAG_GRANT_READ_URI_PERMISSION or + android.content.Intent.FLAG_GRANT_WRITE_URI_PERMISSION + ) + selectedDirs.value = selectedDirs.value + (taskId to uri) + } + pickingForTask.value = null + } + + if (tasks.isEmpty()) return + + Surface( + shape = RoundedCornerShape(12.dp), + color = MaterialTheme.colorScheme.surfaceVariant.copy(alpha = 0.4f), + tonalElevation = 1.dp, + modifier = Modifier.fillMaxWidth() + ) { + Column( + modifier = Modifier.padding(12.dp), + verticalArrangement = Arrangement.spacedBy(10.dp) + ) { + Text( + "任务管理", + style = MaterialTheme.typography.titleSmall, + color = MaterialTheme.colorScheme.onSurface.copy(alpha = 0.7f) + ) + + tasks.forEach { task -> + TaskRow( + task = task, + isDownloading = taskPanelState.downloadState is DownloadState.Downloading + && (taskPanelState.downloadState as DownloadState.Downloading).taskId == task.taskId, + downloadProgress = if (taskPanelState.downloadState is DownloadState.Downloading + && (taskPanelState.downloadState as DownloadState.Downloading).taskId == task.taskId + ) (taskPanelState.downloadState as DownloadState.Downloading).progress else 0f, + isDownloaded = taskPanelState.downloadState is DownloadState.Downloaded + && (taskPanelState.downloadState as DownloadState.Downloaded).taskId == task.taskId, + downloadedUri = if (taskPanelState.downloadState is DownloadState.Downloaded + && (taskPanelState.downloadState as DownloadState.Downloaded).taskId == task.taskId + ) (taskPanelState.downloadState as DownloadState.Downloaded).fileUri else null, + downloadError = if (taskPanelState.downloadState is DownloadState.Error + && (taskPanelState.downloadState as DownloadState.Error).taskId == task.taskId) + (taskPanelState.downloadState as DownloadState.Error).message else null, + selectedDirUri = selectedDirs.value[task.taskId], + onSelectDir = { + pickingForTask.value = task.taskId + dirPickerLauncher.launch(null) + }, + onDownload = { destUri -> + onDownloadResult(task, destUri, context) + }, + onOpenDownloaded = { + // Open the downloaded file + val intent = android.content.Intent(android.content.Intent.ACTION_VIEW).apply { + val uri = if (taskPanelState.downloadState is DownloadState.Downloaded + && (taskPanelState.downloadState as DownloadState.Downloaded).taskId == task.taskId + ) (taskPanelState.downloadState as DownloadState.Downloaded).fileUri else return@apply + setDataAndType(uri, when (task.processType) { + "markdown" -> "application/zip" + else -> "application/pdf" + }) + addFlags(android.content.Intent.FLAG_GRANT_READ_URI_PERMISSION) + } + try { + context.startActivity(intent) + } catch (_: Exception) { + // No app to handle this file type + } + }, + onDismissError = onResetDownloadState, + ) + } + } + } +} + +@Composable +private fun TaskRow( + task: RemoteTask, + isDownloading: Boolean, + downloadProgress: Float, + isDownloaded: Boolean, + downloadedUri: android.net.Uri?, + downloadError: String?, + selectedDirUri: android.net.Uri?, + onSelectDir: () -> Unit, + onDownload: (Uri) -> Unit, + onOpenDownloaded: () -> Unit, + onDismissError: () -> Unit, +) { + Column(verticalArrangement = Arrangement.spacedBy(4.dp)) { + // Row 1: file name + type badge + status + Row( + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.spacedBy(6.dp) + ) { + Text( + text = task.fileName.ifEmpty { task.taskId.take(8) + "..." }, + style = MaterialTheme.typography.bodySmall, + modifier = Modifier.weight(1f), + maxLines = 1, + ) + + // Process type badge + Surface( + shape = RoundedCornerShape(4.dp), + color = MaterialTheme.colorScheme.primary.copy(alpha = 0.15f), + ) { + Text( + text = task.processType, + style = MaterialTheme.typography.labelSmall, + modifier = Modifier.padding(horizontal = 6.dp, vertical = 2.dp), + color = MaterialTheme.colorScheme.primary, + ) + } + + // Status badge + val (statusText, statusColor) = when (task.status) { + "queued" -> "排队中" to Color(0xFFFFA726) + "processing" -> "处理中" to Color(0xFF42A5F5) + "completed" -> "已完成" to Color(0xFF66BB6A) + "failed" -> "失败" to Color(0xFFEF5350) + else -> task.status to Color.Gray + } + Text( + text = statusText, + style = MaterialTheme.typography.labelSmall, + color = statusColor, + ) + } + + // Row 2: Progress bar (for processing tasks) + if (task.status == "processing") { + LinearProgressIndicator( + progress = { task.progress / 100f }, + modifier = Modifier.fillMaxWidth() + ) + } + + // Row 3: Error message (for failed tasks) + if (task.status == "failed" && task.message.isNotEmpty()) { + Text( + text = task.message, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.error.copy(alpha = 0.8f), + maxLines = 2, + ) + } + + // Row 4: Download actions for completed tasks + if (task.status == "completed" && !isDownloaded) { + Row( + horizontalArrangement = Arrangement.spacedBy(8.dp), + modifier = Modifier.fillMaxWidth() + ) { + if (selectedDirUri != null) { + Button( + onClick = { onDownload(selectedDirUri) }, + enabled = !isDownloading, + contentPadding = PaddingValues( + start = 10.dp, end = 10.dp, top = 4.dp, bottom = 4.dp + ), + modifier = Modifier.heightIn(min = 32.dp) + ) { + if (isDownloading) { + CircularProgressIndicator( + modifier = Modifier.size(14.dp), + strokeWidth = 2.dp, + color = MaterialTheme.colorScheme.onPrimary, + ) + Spacer(Modifier.width(4.dp)) + Text( + "${(downloadProgress * 100).toInt()}%", + style = MaterialTheme.typography.labelSmall + ) + } else { + Icon(Icons.Default.Download, contentDescription = null, modifier = Modifier.size(14.dp)) + Spacer(Modifier.width(4.dp)) + Text("下载", style = MaterialTheme.typography.labelSmall) + } + } + } + + OutlinedButton( + onClick = onSelectDir, + contentPadding = PaddingValues( + start = 10.dp, end = 10.dp, top = 4.dp, bottom = 4.dp + ), + modifier = Modifier.heightIn(min = 32.dp) + ) { + Icon( + Icons.Default.Download, // reuse download icon for simplicity + contentDescription = null, + modifier = Modifier.size(14.dp) + ) + Spacer(Modifier.width(4.dp)) + Text( + if (selectedDirUri != null) "已选目录" else "选择目录", + style = MaterialTheme.typography.labelSmall + ) + } + } + } + + // Row 5: Downloaded state with open button + if (isDownloaded) { + Row( + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.spacedBy(8.dp) + ) { + Icon( + Icons.Default.CheckCircle, + contentDescription = null, + tint = MaterialTheme.colorScheme.primary, + modifier = Modifier.size(14.dp) + ) + Text( + "已下载", + style = MaterialTheme.typography.labelSmall, + color = MaterialTheme.colorScheme.primary, + ) + OutlinedButton( + onClick = onOpenDownloaded, + contentPadding = PaddingValues( + start = 10.dp, end = 10.dp, top = 4.dp, bottom = 4.dp + ), + modifier = Modifier.heightIn(min = 32.dp) + ) { + Icon( + Icons.AutoMirrored.Filled.OpenInNew, + contentDescription = null, + modifier = Modifier.size(14.dp) + ) + Spacer(Modifier.width(4.dp)) + Text("打开", style = MaterialTheme.typography.labelSmall) + } + } + } + + // Row 6: Download error + if (downloadError != null) { + Row( + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.spacedBy(4.dp) + ) { + Icon( + Icons.Default.Error, + contentDescription = null, + tint = MaterialTheme.colorScheme.error, + modifier = Modifier.size(12.dp) + ) + Text( + downloadError, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.error, + ) + } + } + } +} + @Composable private fun ActionSurface( modifier: Modifier = Modifier, @@ -716,6 +1181,8 @@ fun ExportPreviewToCustomize(uiState: ExportUiState) { onShare = {}, onSave = {}, onOpen = {}, + onUploadToPc = {}, + onUploadAndProcess = {}, onCloseScan = {}, ) } diff --git a/app/src/main/java/org/fairscan/app/ui/screens/export/ExportUiState.kt b/app/src/main/java/org/fairscan/app/ui/screens/export/ExportUiState.kt index c60d1fc..b7e64da 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/export/ExportUiState.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/export/ExportUiState.kt @@ -27,10 +27,55 @@ data class ExportUiState( val hasShared: Boolean = false, val error: ExportError? = null, val isResumedScan: Boolean = false, + // Upload to PC + val uploadState: UploadState = UploadState.Idle, + // Task management panel + val taskPanelState: TaskPanelState = TaskPanelState(), ) { val hasSavedOrShared get() = savedBundle != null || hasShared } +/** State of the PDF upload to PC server. */ +sealed class UploadState { + /** No upload in progress. */ + data object Idle : UploadState() + + /** Upload is in progress with [progress] 0.0..1.0. */ + data class Uploading(val progress: Float) : UploadState() + + /** Upload completed successfully. taskId is set when processing was also requested. */ + data class Uploaded(val fileId: String, val taskId: String? = null) : UploadState() + + /** Upload failed with an error message. */ + data class Error(val message: String) : UploadState() +} + +/** A remote processing task displayed in the task management panel. */ +data class RemoteTask( + val fileId: String, + val taskId: String, + val processType: String, // "ocrpdf" | "markdown" + val status: String, // "queued" | "processing" | "completed" | "failed" + val progress: Int, // 0..100 + val fileName: String = "", + val message: String = "", +) + +/** State for the task management panel. */ +data class TaskPanelState( + val tasks: List = emptyList(), + val downloadState: DownloadState = DownloadState.Idle, +) + +/** Download state for a task artifact. */ +sealed class DownloadState { + data object Idle : DownloadState() + data class Downloading(val taskId: String, val progress: Float) : DownloadState() + /** Download completed, providing the local file URI for the user to open. */ + data class Downloaded(val taskId: String, val fileUri: android.net.Uri) : DownloadState() + data class Error(val taskId: String, val message: String) : DownloadState() +} + data class SavedItem( val uri: Uri, val fileName: String, diff --git a/app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt b/app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt index 26a1407..38023ce 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt @@ -31,6 +31,7 @@ import kotlinx.collections.immutable.ImmutableList import kotlinx.collections.immutable.toImmutableList import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.MutableSharedFlow import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.StateFlow @@ -48,6 +49,9 @@ import org.fairscan.app.domain.ExportQuality import org.fairscan.app.domain.PageViewKey import org.fairscan.app.domain.pagesToExport import org.fairscan.app.ui.screens.settings.ExportFormat +import org.fairscan.app.network.ServerEndpoint +import org.fairscan.app.network.tasks.TaskClient +import org.fairscan.app.network.upload.PdfUploadClient import java.io.File import java.io.FileInputStream import java.io.IOException @@ -69,6 +73,8 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit private val fileManager = container.fileManager private val settingsRepository = container.settingsRepository private val logger = container.logger + private val pdfUploadClient = container.pdfUploadClient + private val taskClient = container.taskClient private val _events = MutableSharedFlow() val events = _events.asSharedFlow() @@ -93,6 +99,12 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit private val _uiState = MutableStateFlow(ExportUiState()) val uiState: StateFlow = _uiState.asStateFlow() + // Task management panel + private val _taskPanelState = MutableStateFlow(TaskPanelState()) + val taskPanelState: StateFlow = _taskPanelState.asStateFlow() + + private val activePollingJobs = mutableMapOf() + private var resumedScanKeys: List = emptyList() init { viewModelScope.launch { @@ -397,6 +409,236 @@ class ExportViewModel(container: AppContainer, val imageRepository: ImageReposit fileManager.cleanUpOldFiles(thresholdInMillis) } + fun uploadPdfToServer() { + val result = _uiState.value.result ?: return + if (result !is ExportResult.Pdf) return + + viewModelScope.launch { + _uiState.update { it.copy(uploadState = UploadState.Uploading(0f)) } + try { + val endpoint = resolveServerEndpoint() ?: return@launch + + val uploadResult = withContext(Dispatchers.IO) { + pdfUploadClient.uploadPdf(endpoint, result.file) { progress -> + _uiState.update { it.copy(uploadState = UploadState.Uploading(progress)) } + } + } + + _uiState.update { + it.copy(uploadState = UploadState.Uploaded(uploadResult.fileId)) + } + android.util.Log.i("Upload", "PDF uploaded, fileId=${uploadResult.fileId}") + } catch (e: CancellationException) { + throw e + } catch (e: Exception) { + logger.e("Upload", "Failed to upload PDF", e) + _uiState.update { + it.copy(uploadState = UploadState.Error(e.message ?: "上传失败")) + } + } + } + } + + fun uploadAndProcess(processType: String = "ocrpdf") { + val result = _uiState.value.result ?: return + if (result !is ExportResult.Pdf) return + + viewModelScope.launch { + _uiState.update { it.copy(uploadState = UploadState.Uploading(0f)) } + try { + val endpoint = resolveServerEndpoint() ?: return@launch + + // Step 1: Upload + val uploadResult = withContext(Dispatchers.IO) { + pdfUploadClient.uploadPdf(endpoint, result.file) { progress -> + _uiState.update { it.copy(uploadState = UploadState.Uploading(progress)) } + } + } + + // Step 2: Create processing task + val taskResult = withContext(Dispatchers.IO) { + taskClient.processPdf(endpoint, uploadResult.fileId, processType) + } + + // Step 3: Add task to panel + val remoteTask = RemoteTask( + fileId = uploadResult.fileId, + taskId = taskResult.taskId, + processType = processType, + status = "queued", + progress = 0, + fileName = result.file.name, + ) + _taskPanelState.update { state -> + state.copy(tasks = state.tasks + remoteTask) + } + + // Step 4: Start background polling + startPolling(remoteTask, endpoint) + + _uiState.update { + it.copy(uploadState = UploadState.Uploaded(uploadResult.fileId, taskResult.taskId)) + } + android.util.Log.i("Upload", "PDF uploaded + task created: file=${uploadResult.fileId}, task=${taskResult.taskId}") + } catch (e: CancellationException) { + throw e + } catch (e: Exception) { + logger.e("Upload", "Failed to upload & process PDF", e) + _uiState.update { + it.copy(uploadState = UploadState.Error(e.message ?: "上传处理失败")) + } + } + } + } + + private fun startPolling(task: RemoteTask, endpoint: ServerEndpoint) { + val job = viewModelScope.launch { + try { + while (true) { + delay(2000) + val taskStatus = withContext(Dispatchers.IO) { + taskClient.getTaskStatus(endpoint, task.taskId) + } + _taskPanelState.update { state -> + val updated = state.tasks.map { t -> + if (t.taskId == task.taskId) { + t.copy( + status = taskStatus.status, + progress = taskStatus.progress, + message = taskStatus.message, + ) + } else t + } + state.copy(tasks = updated) + } + if (taskStatus.status == "completed" || taskStatus.status == "failed") { + activePollingJobs.remove(task.taskId) + return@launch + } + } + } catch (e: CancellationException) { + // Polling cancelled + } catch (e: Exception) { + _taskPanelState.update { state -> + val updated = state.tasks.map { t -> + if (t.taskId == task.taskId) { + t.copy(status = "failed", message = e.message ?: "轮询失败") + } else t + } + state.copy(tasks = updated) + } + activePollingJobs.remove(task.taskId) + } + } + activePollingJobs[task.taskId] = job + } + + fun downloadResult(task: RemoteTask, destDirUri: Uri, context: Context) { + viewModelScope.launch { + _taskPanelState.update { it.copy(downloadState = DownloadState.Downloading(task.taskId, 0f)) } + try { + val endpoint = resolveServerEndpointForTask(task.taskId) ?: return@launch + + // List artifacts to find the preferred one + val artifacts = withContext(Dispatchers.IO) { + taskClient.listArtifacts(endpoint, task.taskId) + } + + // Prefer ZIP for markdown, PDF for ocrpdf + val preferredArtifact = if (task.processType == "markdown") { + artifacts.find { it.fileType == "zip" } ?: artifacts.firstOrNull() + } else { + artifacts.find { it.fileType == "pdf" } ?: artifacts.firstOrNull() + } + + if (preferredArtifact == null) { + _taskPanelState.update { it.copy(downloadState = DownloadState.Error(task.taskId, "没有可下载的产物")) } + return@launch + } + + // Download to a temp file first, then copy to SAF + val tempFile = File(preparationDir, preferredArtifact.fileName) + withContext(Dispatchers.IO) { + taskClient.downloadArtifact(endpoint, preferredArtifact.artifactId, tempFile) { progress -> + _taskPanelState.update { + it.copy(downloadState = DownloadState.Downloading(task.taskId, progress)) + } + } + } + + // Copy to SAF directory + val safFile = withContext(Dispatchers.IO) { + val tree = DocumentFile.fromTreeUri(context, destDirUri) + ?: throw IllegalStateException("Invalid SAF directory") + val target = tree.createFile( + preferredArtifact.fileType.let { + when (it) { + "zip" -> "application/zip" + "pdf" -> "application/pdf" + else -> "text/markdown" + } + }, + preferredArtifact.fileName + ) ?: throw IllegalStateException("Unable to create file in SAF directory") + + context.contentResolver.openOutputStream(target.uri)?.use { output -> + tempFile.inputStream().use { input -> + input.copyTo(output) + } + } ?: throw IllegalStateException("Failed to open SAF output stream") + + target + } + + // Clean up temp file + tempFile.delete() + + _taskPanelState.update { + it.copy(downloadState = DownloadState.Downloaded(task.taskId, safFile.uri)) + } + } catch (e: CancellationException) { + throw e + } catch (e: Exception) { + logger.e("Download", "Failed to download artifact", e) + _taskPanelState.update { + it.copy(downloadState = DownloadState.Error(task.taskId, e.message ?: "下载失败")) + } + } + } + } + + fun resetDownloadState() { + _taskPanelState.update { it.copy(downloadState = DownloadState.Idle) } + } + + private suspend fun resolveServerEndpointForTask(taskId: String): ServerEndpoint? { + val host = settingsRepository.serverHost.first() + val port = settingsRepository.serverPort.first() + if (host.isNullOrBlank()) { + _taskPanelState.update { + it.copy(downloadState = DownloadState.Error(taskId, "未配置服务器地址")) + } + return null + } + return ServerEndpoint(host, port) + } + + private suspend fun resolveServerEndpoint(): ServerEndpoint? { + val host = settingsRepository.serverHost.first() + val port = settingsRepository.serverPort.first() + if (host.isNullOrBlank()) { + _uiState.update { + it.copy(uploadState = UploadState.Error("未配置服务器地址")) + } + return null + } + return ServerEndpoint(host, port) + } + + fun resetUploadState() { + _uiState.update { it.copy(uploadState = UploadState.Idle) } + } + private fun resolveExportDirName(context: Context, exportDirUri: Uri?): String? { return if (exportDirUri == null) { null @@ -443,6 +685,10 @@ data class ExportActions( val share: () -> Unit, val save: () -> Unit, val open: (SavedItem) -> Unit, + val uploadToPc: () -> Unit, + val uploadAndProcess: (processType: String) -> Unit, + val downloadResult: (RemoteTask, Uri, Context) -> Unit = { _, _, _ -> }, + val resetDownloadState: () -> Unit = {}, ) class MissingExportDirPermissionException( diff --git a/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsRepository.kt b/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsRepository.kt index c503a41..585e571 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsRepository.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsRepository.kt @@ -35,6 +35,16 @@ class SettingsRepository(private val context: Context) { private val EXPORT_FORMAT = stringPreferencesKey("export_format") private val EXPORT_QUALITY = stringPreferencesKey("export_quality") + // Network collaboration settings + private val SERVER_HOST = stringPreferencesKey("server_host") + private val SERVER_PORT = stringPreferencesKey("server_port") + private val SERVER_DISPLAY_NAME = stringPreferencesKey("server_display_name") + private val LAST_SELECTED_SERVICE_ID = stringPreferencesKey("last_selected_service_id") + private val STREAM_QUALITY = stringPreferencesKey("stream_quality") + private val POST_PROCESS_MODE = stringPreferencesKey("post_process_mode") + private val AUTO_DOWNLOAD_PROCESSED_RESULT = stringPreferencesKey("auto_download_processed_result") + private val STREAM_FRAME_RATE = stringPreferencesKey("stream_frame_rate") + val defaultColorMode: Flow = context.dataStore.data.map { prefs -> when (prefs[DEFAULT_COLOR_MODE]) { @@ -73,6 +83,61 @@ class SettingsRepository(private val context: Context) { } } + val serverHost: Flow = + context.dataStore.data.map { prefs -> + prefs[SERVER_HOST] + } + + val serverPort: Flow = + context.dataStore.data.map { prefs -> + prefs[SERVER_PORT]?.toIntOrNull() ?: 2026 + } + + val serverDisplayName: Flow = + context.dataStore.data.map { prefs -> + prefs[SERVER_DISPLAY_NAME] + } + + val lastSelectedServiceId: Flow = + context.dataStore.data.map { prefs -> + prefs[LAST_SELECTED_SERVICE_ID] + } + + val streamQuality: Flow = + context.dataStore.data.map { prefs -> + when (prefs[STREAM_QUALITY]) { + "LOW" -> StreamQuality.LOW + "HIGH" -> StreamQuality.HIGH + "BALANCED", null -> StreamQuality.BALANCED + else -> StreamQuality.BALANCED + } + } + + val postProcessMode: Flow = + context.dataStore.data.map { prefs -> + when (prefs[POST_PROCESS_MODE]) { + "MARKDOWN" -> PostProcessMode.MARKDOWN + "OCRPDF", null -> PostProcessMode.OCRPDF + else -> PostProcessMode.OCRPDF + } + } + + val autoDownloadProcessedResult: Flow = + context.dataStore.data.map { prefs -> + prefs[AUTO_DOWNLOAD_PROCESSED_RESULT]?.toBoolean() ?: false + } + + val streamFrameRate: Flow = + context.dataStore.data.map { prefs -> + when (prefs[STREAM_FRAME_RATE]) { + "UNLIMITED" -> StreamFrameRate.UNLIMITED + "FPS_15" -> StreamFrameRate.FPS_15 + "FPS_5" -> StreamFrameRate.FPS_5 + "FPS_10", null -> StreamFrameRate.FPS_10 + else -> StreamFrameRate.FPS_10 + } + } + suspend fun setDefaultColorMode(mode: DefaultColorMode) { context.dataStore.edit { prefs -> prefs[DEFAULT_COLOR_MODE] = mode.name @@ -100,6 +165,66 @@ class SettingsRepository(private val context: Context) { prefs[EXPORT_QUALITY] = quality.name } } + + suspend fun setServerHost(host: String?) { + context.dataStore.edit { prefs -> + if (host == null) { + prefs.remove(SERVER_HOST) + } else { + prefs[SERVER_HOST] = host + } + } + } + + suspend fun setServerPort(port: Int) { + context.dataStore.edit { prefs -> + prefs[SERVER_PORT] = port.toString() + } + } + + suspend fun setServerDisplayName(name: String?) { + context.dataStore.edit { prefs -> + if (name == null) { + prefs.remove(SERVER_DISPLAY_NAME) + } else { + prefs[SERVER_DISPLAY_NAME] = name + } + } + } + + suspend fun setLastSelectedServiceId(id: String?) { + context.dataStore.edit { prefs -> + if (id == null) { + prefs.remove(LAST_SELECTED_SERVICE_ID) + } else { + prefs[LAST_SELECTED_SERVICE_ID] = id + } + } + } + + suspend fun setStreamQuality(quality: StreamQuality) { + context.dataStore.edit { prefs -> + prefs[STREAM_QUALITY] = quality.name + } + } + + suspend fun setPostProcessMode(mode: PostProcessMode) { + context.dataStore.edit { prefs -> + prefs[POST_PROCESS_MODE] = mode.name + } + } + + suspend fun setAutoDownloadProcessedResult(enabled: Boolean) { + context.dataStore.edit { prefs -> + prefs[AUTO_DOWNLOAD_PROCESSED_RESULT] = enabled.toString() + } + } + + suspend fun setStreamFrameRate(rate: StreamFrameRate) { + context.dataStore.edit { prefs -> + prefs[STREAM_FRAME_RATE] = rate.name + } + } } enum class DefaultColorMode(val colorMode: ColorMode?, val labelResource: Int) { @@ -112,3 +237,28 @@ enum class ExportFormat(val mimeType: String) { PDF("application/pdf"), JPEG("image/jpeg"), } + +enum class StreamQuality { + LOW, + BALANCED, + HIGH, +} + +enum class PostProcessMode { + MARKDOWN, + OCRPDF, +} + +enum class StreamFrameRate(val labelRes: Int, val uiLabel: String) { + UNLIMITED(0, "无限制"), + FPS_15(0, "15 fps"), + FPS_10(0, "10 fps"), + FPS_5(0, "5 fps"), +} + +val StreamFrameRate.intervalMs: Long? get() = when (this) { + StreamFrameRate.UNLIMITED -> null + StreamFrameRate.FPS_15 -> 66L + StreamFrameRate.FPS_10 -> 100L + StreamFrameRate.FPS_5 -> 200L +} diff --git a/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsScreen.kt b/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsScreen.kt index 099a597..71525e0 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsScreen.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsScreen.kt @@ -30,21 +30,29 @@ import androidx.compose.foundation.verticalScroll import androidx.compose.material.icons.Icons import androidx.compose.material.icons.filled.Folder import androidx.compose.material3.Card +import androidx.compose.material3.Checkbox import androidx.compose.material3.ExperimentalMaterial3Api import androidx.compose.material3.HorizontalDivider import androidx.compose.material3.Icon import androidx.compose.material3.MaterialTheme import androidx.compose.material3.OutlinedButton +import androidx.compose.material3.OutlinedTextField import androidx.compose.material3.RadioButton import androidx.compose.material3.Scaffold import androidx.compose.material3.Text import androidx.compose.material3.TopAppBar import androidx.compose.runtime.Composable +import androidx.compose.runtime.LaunchedEffect +import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.runtime.setValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.graphics.Color import androidx.compose.ui.platform.LocalResources import androidx.compose.ui.res.stringResource +import androidx.compose.ui.text.input.KeyboardType import androidx.compose.ui.tooling.preview.Preview import androidx.compose.ui.unit.dp import org.fairscan.app.R @@ -61,6 +69,14 @@ fun SettingsScreen( onResetExportDirClick: () -> Unit, onExportFormatChanged: (ExportFormat) -> Unit, onExportQualityChanged: (ExportQuality) -> Unit, + onServerHostChanged: (String?) -> Unit, + onServerPortChanged: (Int) -> Unit, + onStreamQualityChanged: (StreamQuality) -> Unit, + onPostProcessModeChanged: (PostProcessMode) -> Unit, + onAutoDownloadChanged: (Boolean) -> Unit, + onStreamFrameRateChanged: (StreamFrameRate) -> Unit, + onScanNetworkHostsClick: () -> Unit, + onTestConnectionClick: () -> Unit, onBack: () -> Unit, ) { BackHandler { onBack() } @@ -79,6 +95,14 @@ fun SettingsScreen( onResetExportDirClick, onExportFormatChanged, onExportQualityChanged, + onServerHostChanged, + onServerPortChanged, + onStreamQualityChanged, + onPostProcessModeChanged, + onAutoDownloadChanged, + onStreamFrameRateChanged, + onScanNetworkHostsClick, + onTestConnectionClick, modifier = Modifier.padding(paddingValues)) } } @@ -91,6 +115,14 @@ private fun SettingsContent( onResetExportDirClick: () -> Unit, onExportFormatChanged: (ExportFormat) -> Unit, onExportQualityChanged: (ExportQuality) -> Unit, + onServerHostChanged: (String?) -> Unit, + onServerPortChanged: (Int) -> Unit, + onStreamQualityChanged: (StreamQuality) -> Unit, + onPostProcessModeChanged: (PostProcessMode) -> Unit, + onAutoDownloadChanged: (Boolean) -> Unit, + onStreamFrameRateChanged: (StreamFrameRate) -> Unit, + onScanNetworkHostsClick: () -> Unit, + onTestConnectionClick: () -> Unit, modifier: Modifier = Modifier, ) { val (folderLabel, folderLabelColor) = when { @@ -170,6 +202,157 @@ private fun SettingsContent( label = { t -> t.name}, selectedValue = uiState.exportFormat ) + + Spacer(Modifier.height(16.dp)) + HorizontalDivider() + Spacer(Modifier.height(16.dp)) + + Text(stringResource(R.string.settings_section_network), style = MaterialTheme.typography.titleLarge) + Spacer(Modifier.height(16.dp)) + + // Server configuration + Column { + Text("PC 服务器设置", style = MaterialTheme.typography.titleMedium) + Spacer(Modifier.height(8.dp)) + + var hostInput by remember { mutableStateOf(uiState.serverHost ?: "") } + LaunchedEffect(uiState.serverHost) { + hostInput = uiState.serverHost ?: "" + } + OutlinedTextField( + value = hostInput, + onValueChange = { + hostInput = it + onServerHostChanged(it.ifEmpty { null }) + }, + label = { Text("主机地址") }, + modifier = Modifier + .fillMaxWidth() + .padding(bottom = 8.dp), + singleLine = true + ) + + var portInput by remember { mutableStateOf(uiState.serverPort.toString()) } + LaunchedEffect(uiState.serverPort) { + portInput = uiState.serverPort.toString() + } + OutlinedTextField( + value = portInput, + onValueChange = { newValue -> + portInput = newValue + newValue.toIntOrNull()?.let { onServerPortChanged(it) } + }, + label = { Text("端口") }, + modifier = Modifier + .fillMaxWidth() + .padding(bottom = 8.dp), + singleLine = true + ) + + if (uiState.serverDisplayName != null) { + Text( + "已连接: ${uiState.serverDisplayName}", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.primary, + modifier = Modifier.padding(bottom = 8.dp) + ) + } + + Row( + modifier = Modifier + .fillMaxWidth() + .padding(bottom = 16.dp), + horizontalArrangement = Arrangement.spacedBy(8.dp) + ) { + OutlinedButton( + onClick = onScanNetworkHostsClick, + modifier = Modifier.weight(1f) + ) { + Text("扫描主机") + } + OutlinedButton( + onClick = onTestConnectionClick, + modifier = Modifier.weight(1f) + ) { + Text("测试连接") + } + } + } + + Spacer(Modifier.height(16.dp)) + + // Stream quality + RadioButtonGroup( + R.string.stream_quality, + StreamQuality.entries, + onClick = onStreamQualityChanged, + label = { t -> when (t) { + StreamQuality.LOW -> "低 (640p, 45%, 8-12fps)" + StreamQuality.BALANCED -> "均衡 (960p, 60%, 6-10fps)" + StreamQuality.HIGH -> "高 (1280p, 75%, 5-8fps)" + } }, + selectedValue = uiState.streamQuality + ) + + Spacer(Modifier.height(16.dp)) + + // Post process mode + RadioButtonGroup( + R.string.post_process_mode, + PostProcessMode.entries, + onClick = onPostProcessModeChanged, + label = { t -> when (t) { + PostProcessMode.MARKDOWN -> "Markdown (MinerU)" + PostProcessMode.OCRPDF -> "OCR PDF (OCRmyPDF)" + } }, + selectedValue = uiState.postProcessMode + ) + + Spacer(Modifier.height(16.dp)) + + // Stream frame rate control + Text("图传帧率", style = MaterialTheme.typography.titleMedium) + Spacer(Modifier.height(4.dp)) + StreamFrameRate.entries.forEach { rate -> + Row( + modifier = Modifier + .fillMaxWidth() + .clickable { onStreamFrameRateChanged(rate) } + .padding(vertical = 4.dp), + verticalAlignment = Alignment.CenterVertically + ) { + RadioButton( + selected = uiState.streamFrameRate == rate, + onClick = null, + modifier = Modifier.padding(horizontal = 8.dp, vertical = 0.dp) + ) + val desc = when (rate) { + StreamFrameRate.UNLIMITED -> "无限制(每帧都发)" + StreamFrameRate.FPS_15 -> "15 fps(66ms 间隔)" + StreamFrameRate.FPS_10 -> "10 fps(100ms 间隔)" + StreamFrameRate.FPS_5 -> "5 fps(200ms 间隔)" + } + Text(desc, style = MaterialTheme.typography.bodyMedium) + } + } + + Spacer(Modifier.height(16.dp)) + + // Auto download + Row( + modifier = Modifier + .fillMaxWidth() + .clickable { onAutoDownloadChanged(!uiState.autoDownloadProcessedResult) } + .padding(vertical = 8.dp), + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.SpaceBetween + ) { + Text("自动下载处理结果") + Checkbox( + checked = uiState.autoDownloadProcessedResult, + onCheckedChange = { onAutoDownloadChanged(it) } + ) + } } } @@ -267,6 +450,14 @@ fun SettingsScreenPreview(uiState: SettingsUiState) { onResetExportDirClick = {}, onExportFormatChanged = {}, onExportQualityChanged = {}, + onServerHostChanged = {}, + onServerPortChanged = {}, + onStreamQualityChanged = {}, + onPostProcessModeChanged = {}, + onAutoDownloadChanged = {}, + onStreamFrameRateChanged = {}, + onScanNetworkHostsClick = {}, + onTestConnectionClick = {}, onBack = {} ) } diff --git a/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsViewModel.kt b/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsViewModel.kt index 40aba04..6165f8c 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsViewModel.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsViewModel.kt @@ -32,6 +32,15 @@ data class SettingsUiState( val exportDirName: String? = null, val exportFormat: ExportFormat = ExportFormat.PDF, val exportQuality: ExportQuality = ExportQuality.BALANCED, + // Network collaboration settings + val serverHost: String? = null, + val serverPort: Int = 2026, + val serverDisplayName: String? = null, + val lastSelectedServiceId: String? = null, + val streamQuality: StreamQuality = StreamQuality.BALANCED, + val postProcessMode: PostProcessMode = PostProcessMode.OCRPDF, + val autoDownloadProcessedResult: Boolean = false, + val streamFrameRate: StreamFrameRate = StreamFrameRate.FPS_10, ) class SettingsViewModel(container: AppContainer) : ViewModel() { @@ -47,13 +56,29 @@ class SettingsViewModel(container: AppContainer) : ViewModel() { dirName, repo.exportFormat, repo.exportQuality, - ) { colorMode, uri, name, format, quality -> + repo.serverHost, + repo.serverPort, + repo.serverDisplayName, + repo.lastSelectedServiceId, + repo.streamQuality, + repo.postProcessMode, + repo.autoDownloadProcessedResult, + repo.streamFrameRate, + ) { values: Array -> SettingsUiState( - defaultColorMode = colorMode, - exportDirUri = uri, - exportDirName = name, - exportFormat = format, - exportQuality = quality, + defaultColorMode = values[0] as DefaultColorMode, + exportDirUri = values[1] as String?, + exportDirName = values[2] as String?, + exportFormat = values[3] as ExportFormat, + exportQuality = values[4] as ExportQuality, + serverHost = values[5] as String?, + serverPort = values[6] as Int, + serverDisplayName = values[7] as String?, + lastSelectedServiceId = values[8] as String?, + streamQuality = values[9] as StreamQuality, + postProcessMode = values[10] as PostProcessMode, + autoDownloadProcessedResult = values[11] as Boolean, + streamFrameRate = values[12] as StreamFrameRate, ) }.stateIn( viewModelScope, @@ -92,4 +117,52 @@ class SettingsViewModel(container: AppContainer) : ViewModel() { _dirName.value = uri?.let { repo.resolveExportDirName(it) } } } + + fun setServerHost(host: String?) { + viewModelScope.launch { + repo.setServerHost(host) + } + } + + fun setServerPort(port: Int) { + viewModelScope.launch { + repo.setServerPort(port) + } + } + + fun setServerDisplayName(name: String?) { + viewModelScope.launch { + repo.setServerDisplayName(name) + } + } + + fun setLastSelectedServiceId(id: String?) { + viewModelScope.launch { + repo.setLastSelectedServiceId(id) + } + } + + fun setStreamQuality(quality: StreamQuality) { + viewModelScope.launch { + repo.setStreamQuality(quality) + } + } + + fun setPostProcessMode(mode: PostProcessMode) { + viewModelScope.launch { + repo.setPostProcessMode(mode) + } + } + + fun setAutoDownloadProcessedResult(enabled: Boolean) { + viewModelScope.launch { + repo.setAutoDownloadProcessedResult(enabled) + } + } + + fun setStreamFrameRate(rate: StreamFrameRate) { + viewModelScope.launch { + repo.setStreamFrameRate(rate) + } + } } diff --git a/app/src/main/res/drawable/icon.png b/app/src/main/res/drawable/icon.png index 842543e..97316bd 100644 Binary files a/app/src/main/res/drawable/icon.png and b/app/src/main/res/drawable/icon.png differ diff --git a/app/src/main/res/mipmap-anydpi/ic_launcher.xml b/app/src/main/res/mipmap-anydpi/ic_launcher.xml index 1413a31..45b840d 100644 --- a/app/src/main/res/mipmap-anydpi/ic_launcher.xml +++ b/app/src/main/res/mipmap-anydpi/ic_launcher.xml @@ -1,6 +1,6 @@ - - + + \ No newline at end of file diff --git a/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml b/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml index 1413a31..45b840d 100644 --- a/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml +++ b/app/src/main/res/mipmap-anydpi/ic_launcher_round.xml @@ -1,6 +1,6 @@ - - + + \ No newline at end of file diff --git a/app/src/main/res/mipmap-hdpi/ic_launcher.png b/app/src/main/res/mipmap-hdpi/ic_launcher.png new file mode 100644 index 0000000..085c1e3 Binary files /dev/null and b/app/src/main/res/mipmap-hdpi/ic_launcher.png differ diff --git a/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/app/src/main/res/mipmap-hdpi/ic_launcher.webp deleted file mode 100644 index 1961efa..0000000 Binary files a/app/src/main/res/mipmap-hdpi/ic_launcher.webp and /dev/null differ diff --git a/app/src/main/res/mipmap-hdpi/ic_launcher_round.png b/app/src/main/res/mipmap-hdpi/ic_launcher_round.png new file mode 100644 index 0000000..085c1e3 Binary files /dev/null and b/app/src/main/res/mipmap-hdpi/ic_launcher_round.png differ diff --git a/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp deleted file mode 100644 index 7738756..0000000 Binary files a/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp and /dev/null differ diff --git a/app/src/main/res/mipmap-mdpi/ic_launcher.png b/app/src/main/res/mipmap-mdpi/ic_launcher.png new file mode 100644 index 0000000..1e176be Binary files /dev/null and b/app/src/main/res/mipmap-mdpi/ic_launcher.png differ diff --git a/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/app/src/main/res/mipmap-mdpi/ic_launcher.webp deleted file mode 100644 index b7fa195..0000000 Binary files a/app/src/main/res/mipmap-mdpi/ic_launcher.webp and /dev/null differ diff --git a/app/src/main/res/mipmap-mdpi/ic_launcher_round.png b/app/src/main/res/mipmap-mdpi/ic_launcher_round.png new file mode 100644 index 0000000..1e176be Binary files /dev/null and b/app/src/main/res/mipmap-mdpi/ic_launcher_round.png differ diff --git a/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp deleted file mode 100644 index 8bb2d50..0000000 Binary files a/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp and /dev/null differ diff --git a/app/src/main/res/mipmap-xhdpi/ic_launcher.png b/app/src/main/res/mipmap-xhdpi/ic_launcher.png new file mode 100644 index 0000000..e8df1c6 Binary files /dev/null and b/app/src/main/res/mipmap-xhdpi/ic_launcher.png differ diff --git a/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/app/src/main/res/mipmap-xhdpi/ic_launcher.webp deleted file mode 100644 index c42de14..0000000 Binary files a/app/src/main/res/mipmap-xhdpi/ic_launcher.webp and /dev/null differ diff --git a/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png b/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png new file mode 100644 index 0000000..e8df1c6 Binary files /dev/null and b/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png differ diff --git a/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp deleted file mode 100644 index 17cae6d..0000000 Binary files a/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp and /dev/null differ diff --git a/app/src/main/res/mipmap-xxhdpi/ic_launcher.png b/app/src/main/res/mipmap-xxhdpi/ic_launcher.png new file mode 100644 index 0000000..dfaa352 Binary files /dev/null and b/app/src/main/res/mipmap-xxhdpi/ic_launcher.png differ diff --git a/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp deleted file mode 100644 index dc21c1e..0000000 Binary files a/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp and /dev/null differ diff --git a/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png b/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png new file mode 100644 index 0000000..dfaa352 Binary files /dev/null and b/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png differ diff --git a/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp deleted file mode 100644 index efc7ae4..0000000 Binary files a/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp and /dev/null differ diff --git a/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png b/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png new file mode 100644 index 0000000..6dc4dce Binary files /dev/null and b/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png differ diff --git a/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp deleted file mode 100644 index f81c04f..0000000 Binary files a/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp and /dev/null differ diff --git a/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png b/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png new file mode 100644 index 0000000..6dc4dce Binary files /dev/null and b/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png differ diff --git a/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp deleted file mode 100644 index a1bec3a..0000000 Binary files a/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp and /dev/null differ diff --git a/app/src/main/res/values/ic_launcher_background.xml b/app/src/main/res/values/ic_launcher_background.xml index 9aafe68..c5d5899 100644 --- a/app/src/main/res/values/ic_launcher_background.xml +++ b/app/src/main/res/values/ic_launcher_background.xml @@ -1,4 +1,4 @@ - #0CAD55 + #FFFFFF \ No newline at end of file diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index 7685a25..1e0793a 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -63,6 +63,9 @@ Settings Scan Export + Network Collaboration + Stream Quality + Post Process Mode Share Share document Cannot save file: permission was denied diff --git a/app/src/main/res/xml/network_security_config.xml b/app/src/main/res/xml/network_security_config.xml new file mode 100644 index 0000000..2439f15 --- /dev/null +++ b/app/src/main/res/xml/network_security_config.xml @@ -0,0 +1,4 @@ + + + + diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 3a27a38..6f7cdc9 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -23,6 +23,7 @@ kotlinSerialization = "1.10.0" reorderable = "3.0.0" jetbrainsKotlinJvm = "2.3.10" coroutines-test = "1.10.2" +okhttp = "4.12.0" [libraries] androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" } @@ -63,6 +64,7 @@ kotlinx-coroutines-test = { group = "org.jetbrains.kotlinx", name = "kotlinx-cor assertj = { group="org.assertj", name="assertj-core", version.ref = "assertj" } +okhttp = { group = "com.squareup.okhttp3", name = "okhttp", version.ref = "okhttp" } [plugins] android-application = { id = "com.android.application", version.ref = "agp" } diff --git a/pc-server/README.md b/pc-server/README.md new file mode 100644 index 0000000..f095aa7 --- /dev/null +++ b/pc-server/README.md @@ -0,0 +1,15 @@ +# PC Server for FairScan real-time camera streaming +# +# This is a minimal test server that: +# - Receives WebSocket frames from the Android app +# - Displays them in a browser +# - Provides GET /health for connection testing + +## Quick Start + +```bash +pip install -r requirements.txt +python main.py +``` + +Open http://localhost:2026 in a browser to see the stream. diff --git a/pc-server/main.py b/pc-server/main.py new file mode 100644 index 0000000..60ae0e2 --- /dev/null +++ b/pc-server/main.py @@ -0,0 +1,800 @@ +""" +FairScan PC Server — Streaming, PDF upload & real MinerU task processing. + +Endpoints: + Streaming: + GET /health → Health check (used by Android for connection test) + WS /stream → WebSocket endpoint for receiving JPEG frames + GET / → Web page showing the live stream + + Upload & Tasks: + POST /upload/pdf → Upload a PDF file, returns fileId + POST /tasks/process → Create a MinerU processing task (ocrpdf / markdown) + GET /tasks/{taskId} → Query task status (queued/processing/completed/failed) + GET /tasks/{taskId}/artifacts → List result files for a completed task + GET /artifacts/{artifactId}/download → Download a result file + GET /files/{fileId}/download → Download an uploaded file +""" + +import asyncio +import json +import os +import time +import uuid +import zipfile +from datetime import datetime +from pathlib import Path + +# 国内网络环境无法访问 huggingface.co,强制使用本地缓存模型 +os.environ["HF_HUB_OFFLINE"] = "1" +# Tesseract OCR 语言包路径(OCRmyPDF 需要,从 conda 环境自动获取) +_tessdata = Path(os.environ.get("CONDA_PREFIX", "")) / "Library" / "share" / "tessdata" +if _tessdata.exists(): + os.environ["TESSDATA_PREFIX"] = str(_tessdata) + +from fastapi import FastAPI, File, Form, HTTPException, UploadFile, WebSocket, WebSocketDisconnect +from fastapi.responses import FileResponse, HTMLResponse, JSONResponse + +# ── MinerU & OCRmyPDF integration ──────────────────────────────────────────── + +from mineru.cli.common import aio_do_parse, read_fn +import ocrmypdf +from loguru import logger + +app = FastAPI(title="FairScan PC Server") + +# ── Configuration ───────────────────────────────────────────────────────────── + +UPLOAD_DIR = Path("./uploads") +TASKS_DIR = Path("./tasks") +UPLOAD_DIR.mkdir(exist_ok=True) +TASKS_DIR.mkdir(exist_ok=True) + + +# ── In-memory state (streaming) ────────────────────────────────────────────── + +latest_frame: bytes | None = None +frame_timestamp: float = 0.0 +connected_clients: set[WebSocket] = set() +stream_stats: dict = {"frames_received": 0, "bytes_received": 0, "started_at": None} + + +# ── HTML page with live stream viewer ──────────────────────────────────────── + +STREAM_PAGE = """\ + + + + + FairScan Stream + + + + + +
Waiting for stream...
+
Not connected
+ + + +""" + + +# ── Routes: Streaming ──────────────────────────────────────────────────────── + +@app.get("/health") +async def health(): + """Health check endpoint used by Android for connection testing.""" + return JSONResponse({ + "status": "ok", + "name": "FairScan-PC", + "features": ["stream", "upload", "tasks"], + "streamStats": { + "framesReceived": stream_stats["frames_received"], + "uptime": ( + time.time() - stream_stats["started_at"] + if stream_stats["started_at"] else 0 + ), + }, + "timestamp": datetime.utcnow().isoformat(), + }) + + +@app.get("/") +async def index(): + """Serve the live stream viewer page.""" + return HTMLResponse(STREAM_PAGE) + + +@app.websocket("/stream") +async def stream_endpoint(ws: WebSocket): + """WebSocket endpoint that receives JPEG frames from the Android app.""" + await ws.accept() + connected_clients.add(ws) + if stream_stats["started_at"] is None: + stream_stats["started_at"] = time.time() + + try: + frame_count = 0 + while True: + data = await ws.receive_bytes() + global latest_frame, frame_timestamp + latest_frame = data + frame_timestamp = time.time() + stream_stats["frames_received"] += 1 + stream_stats["bytes_received"] += len(data) + frame_count += 1 + if frame_count % 30 == 1: + print(f"[Stream] Received frame #{stream_stats['frames_received']} ({len(data)} bytes)") + # Broadcast to all browser clients + for client in connected_clients: + if client is not ws: + try: + await client.send_bytes(data) + except Exception: + connected_clients.discard(client) + except WebSocketDisconnect: + pass + finally: + connected_clients.discard(ws) + + +# ── Routes: Upload & Tasks ────────────────────────────────────────────────── + +files_db: dict[str, dict] = {} # fileId -> {fileId, fileName, sizeBytes, uploadPath, createdAt} + + +@app.post("/upload/pdf", status_code=201) +async def upload_pdf(file: UploadFile = File(...)): + """Upload a PDF file to the PC (no processing). + + Stores the file in ./uploads/ and returns a fileId for later use. + Processing is a separate step via POST /tasks/process. + """ + if not file.filename or not file.filename.lower().endswith(".pdf"): + raise HTTPException(status_code=400, detail="Only PDF files are accepted") + + file_id = str(uuid.uuid4()) + timestamp = datetime.utcnow().isoformat() + safe_name = file.filename.replace("..", "").replace("/", "_") + + # Save the uploaded PDF + upload_path = UPLOAD_DIR / f"{file_id}_{safe_name}" + content = await file.read() + upload_path.write_bytes(content) + + # Store file record (pure upload, no task/processing) + file_record = { + "fileId": file_id, + "fileName": safe_name, + "mimeType": "application/pdf", + "sizeBytes": len(content), + "uploadPath": str(upload_path), + "createdAt": timestamp, + } + files_db[file_id] = file_record + + print(f"[Upload] Received {safe_name} ({len(content)} bytes) -> file {file_id}") + return JSONResponse({ + "fileId": file_id, + "fileName": safe_name, + "mimeType": "application/pdf", + "sizeBytes": len(content), + }) + + +@app.post("/tasks/process", status_code=202) +async def create_task(body: dict): + """Create a processing task for an uploaded PDF. + + Request body: {"fileId": "...", "processType": "ocrpdf"|"markdown"} + """ + file_id = body.get("fileId", "") + process_type = body.get("processType", "ocrpdf").lower() + + if not file_id: + raise HTTPException(status_code=400, detail="fileId is required") + if process_type not in ("ocrpdf", "markdown"): + raise HTTPException(status_code=400, detail="processType must be 'ocrpdf' or 'markdown'") + + # Look up the uploaded file + file_record = files_db.get(file_id) + if file_record is None: + raise HTTPException(status_code=404, detail="File not found") + + task_id = str(uuid.uuid4()) + timestamp = datetime.utcnow().isoformat() + + task = { + "taskId": task_id, + "fileId": file_id, + "status": "queued", + "progress": 0, + "processType": process_type, + "fileName": file_record["fileName"], + "createdAt": timestamp, + "updatedAt": timestamp, + "uploadPath": file_record["uploadPath"], + "message": f"Task created (processType={process_type})", + } + tasks_db[task_id] = task + + # Start MinerU processing in background + asyncio.create_task(process_with_mineru(task_id)) + + print(f"[Tasks] Created task {task_id} for file {file_id} (processType={process_type})") + return JSONResponse({ + "taskId": task_id, + "status": "queued", + "processType": process_type, + "fileId": file_id, + }) + + +@app.get("/tasks/{task_id}") +async def get_task_status(task_id: str): + """Get the current status of a processing task.""" + task = tasks_db.get(task_id) + if task is None: + raise HTTPException(status_code=404, detail="Task not found") + + return JSONResponse({ + "taskId": task["taskId"], + "fileId": task.get("fileId", ""), + "status": task["status"], + "progress": task["progress"], + "processType": task.get("processType", ""), + "fileName": task["fileName"], + "createdAt": task["createdAt"], + "message": task.get("message", ""), + }) + + +@app.get("/tasks/{task_id}/artifacts") +async def list_artifacts(task_id: str): + """List result files for a completed task.""" + task = tasks_db.get(task_id) + if task is None: + raise HTTPException(status_code=404, detail="Task not found") + + artifacts = artifacts_db.get(task_id, []) + result = [] + for art in artifacts: + result.append({ + "id": art["artifactId"], + "artifactId": art["artifactId"], + "fileName": art["fileName"], + "fileSize": art["fileSize"], + "fileType": art["fileType"], + }) + return JSONResponse(result) + + +@app.get("/artifacts/{artifact_id}/download") +async def download_artifact(artifact_id: str): + """Download a processed artifact file.""" + art = artifacts_map.get(artifact_id) + if art is None: + raise HTTPException(status_code=404, detail="Artifact not found") + + file_path = Path(art["filePath"]) + if not file_path.exists(): + raise HTTPException(status_code=404, detail="Artifact file not found on disk") + + file_type = art["fileType"] + if file_type == "pdf": + media_type = "application/pdf" + elif file_type == "zip": + media_type = "application/zip" + else: + media_type = "text/markdown" + return FileResponse( + path=file_path, + filename=art["fileName"], + media_type=media_type, + ) + + +@app.get("/files/{file_id}/download") +async def download_uploaded_file(file_id: str): + """Download an uploaded (unprocessed) PDF file.""" + file_record = files_db.get(file_id) + if file_record is None: + raise HTTPException(status_code=404, detail="File not found") + + file_path = Path(file_record["uploadPath"]) + if not file_path.exists(): + raise HTTPException(status_code=404, detail="File not found on disk") + + return FileResponse( + path=file_path, + filename=file_record["fileName"], + media_type="application/pdf", + ) + + +# ── Dashboard page ─────────────────────────────────────────────────────────── + +DASHBOARD_PAGE = """\ + + + + + FairScan Dashboard + + + +
+

📊 FairScan Dashboard

+ +
+ +
+ +
📄 已上传的文件
+ + + + + +
文件名文件 ID大小时间操作
+ +
⚙️ 处理任务
+ + + + + +
文件名任务 ID状态进度处理类型时间操作
+ + + + +""" + + +@app.get("/dashboard") +async def dashboard(): + """Serve the task management dashboard page.""" + return HTMLResponse(DASHBOARD_PAGE) + + +@app.get("/api/dashboard") +async def dashboard_api(): + """JSON endpoint providing dashboard data (files + tasks + stats).""" + # List uploaded files + files_list = [] + for fid, f_rec in files_db.items(): + files_list.append({ + "fileId": fid, + "fileName": f_rec.get("fileName", ""), + "sizeBytes": f_rec.get("sizeBytes", 0), + "createdAt": f_rec.get("createdAt", ""), + }) + files_list.sort(key=lambda f: f.get("createdAt", ""), reverse=True) + + # List tasks + tasks_list = [] + for tid, task in tasks_db.items(): + task_artifacts = artifacts_db.get(tid, []) + artifacts_info = [ + {"id": a["artifactId"], "fileName": a["fileName"]} + for a in task_artifacts + ] + tasks_list.append({ + "taskId": tid, + "fileId": task.get("fileId", ""), + "fileName": task.get("fileName", ""), + "status": task["status"], + "progress": task["progress"], + "processType": task.get("processType", ""), + "createdAt": task.get("createdAt", ""), + "message": task.get("message", ""), + "artifacts": artifacts_info, + }) + tasks_list.sort(key=lambda t: t.get("createdAt", ""), reverse=True) + + total = len(tasks_list) + queued = sum(1 for t in tasks_list if t["status"] == "queued") + processing = sum(1 for t in tasks_list if t["status"] == "processing") + completed = sum(1 for t in tasks_list if t["status"] == "completed") + failed = sum(1 for t in tasks_list if t["status"] == "failed") + + return JSONResponse({ + "stats": {"total": total, "queued": queued, "processing": processing, "completed": completed, "failed": failed}, + "files": files_list, + "tasks": tasks_list, + }) + + +# ── In-memory databases ────────────────────────────────────────────────────── + +tasks_db: dict[str, dict] = {} +artifacts_db: dict[str, list[dict]] = {} +artifacts_map: dict[str, dict] = {} + + +async def process_with_mineru(task_id: str): + """Process a PDF using real MinerU pipeline (replaces simulate_processing).""" + task = tasks_db.get(task_id) + if task is None: + return + + process_type = task.get("processType", "ocrpdf") + upload_path_src = task.get("uploadPath") + file_name = task.get("fileName", "document.pdf") + base_name = Path(file_name).stem + lang = task.get("options", {}).get("lang", "ch") + + if not upload_path_src or not Path(upload_path_src).exists(): + task["status"] = "failed" + task["message"] = "Uploaded file not found on disk" + logger.error(f"[MinerU] Task {task_id}: file not found at {upload_path_src}") + return + + task["status"] = "processing" + task["progress"] = 15 + task["updatedAt"] = datetime.utcnow().isoformat() + task["message"] = f"MinerU pipeline started (backend=pipeline, processType={process_type})" + logger.info(f"[MinerU] Task {task_id}: starting {process_type} on {file_name}") + + # Prepare output directory + output_dir = TASKS_DIR / task_id + output_dir.mkdir(parents=True, exist_ok=True) + + try: + pdf_bytes = read_fn(upload_path_src) + + if process_type == "markdown": + await aio_do_parse( + output_dir=str(output_dir), + pdf_file_names=[base_name], + pdf_bytes_list=[pdf_bytes], + p_lang_list=[lang], + backend="pipeline", + parse_method="auto", + f_dump_md=True, + f_dump_middle_json=False, + f_dump_model_output=False, + f_dump_orig_pdf=False, + f_dump_content_list=False, + f_draw_layout_bbox=False, + f_draw_span_bbox=False, + ) + # MinerU output: {output_dir}/{base_name}/auto/{base_name}.md + md_dir = output_dir / base_name / "auto" + md_path = md_dir / f"{base_name}.md" + images_dir = md_dir / "images" + + if md_path.exists(): + artifacts_list = [] + + # Register the .md artifact + md_art_id = str(uuid.uuid4()) + md_artifact = { + "artifactId": md_art_id, + "fileName": f"{base_name}.md", + "fileSize": md_path.stat().st_size, + "fileType": "md", + "filePath": str(md_path), + } + artifacts_list.append(md_artifact) + artifacts_map[md_art_id] = md_artifact + + # If images directory exists and has files, create a ZIP + if images_dir.exists() and any(images_dir.iterdir()): + zip_path = md_dir / f"{base_name}_result.zip" + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + zf.write(md_path, md_path.name) + for img_file in images_dir.rglob("*"): + if img_file.is_file(): + arcname = f"images/{img_file.relative_to(images_dir)}" + zf.write(img_file, arcname) + zip_art_id = str(uuid.uuid4()) + zip_artifact = { + "artifactId": zip_art_id, + "fileName": f"{base_name}_result.zip", + "fileSize": zip_path.stat().st_size, + "fileType": "zip", + "filePath": str(zip_path), + } + artifacts_list.append(zip_artifact) + artifacts_map[zip_art_id] = zip_artifact + logger.info(f"[MinerU] Task {task_id}: ZIP created -> {zip_path} ({zip_path.stat().st_size} bytes)") + + artifacts_db[task_id] = artifacts_list + task["status"] = "completed" + task["progress"] = 100 + task["message"] = f"MinerU Markdown completed ({md_path.stat().st_size} bytes)" + logger.info(f"[MinerU] Task {task_id}: markdown completed -> {md_path}") + else: + task["status"] = "failed" + task["message"] = "MinerU did not produce .md output" + logger.error(f"[MinerU] Task {task_id}: no .md output at {md_path}") + + else: # ocrpdf — use OCRmyPDF for searchable dual-layer PDF + ocr_lang = {"ch": "chi_sim", "en": "eng", "japan": "jpn", "korean": "kor"}.get(lang, "chi_sim") + ocr_output = output_dir / f"{base_name}_ocr.pdf" + + await asyncio.to_thread( + ocrmypdf.ocr, + upload_path_src, + str(ocr_output), + language=ocr_lang, + output_type="pdf", + skip_text=True, + deskew=True, + optimize=0, # skip JBIG2 optimization (pikepdf compat) + ) + + if ocr_output.exists(): + art_id = str(uuid.uuid4()) + artifacts_db[task_id] = [{ + "artifactId": art_id, + "fileName": f"{base_name}_ocr.pdf", + "fileSize": ocr_output.stat().st_size, + "fileType": "pdf", + "filePath": str(ocr_output), + }] + artifacts_map[art_id] = artifacts_db[task_id][0] + task["status"] = "completed" + task["progress"] = 100 + task["message"] = f"OCRmyPDF completed ({ocr_output.stat().st_size} bytes)" + logger.info(f"[OCRmyPDF] Task {task_id}: ocrpdf completed -> {ocr_output}") + else: + task["status"] = "failed" + task["message"] = "OCRmyPDF did not produce output" + + except Exception as e: + task["status"] = "failed" + task["message"] = f"MinerU error: {e}" + task["progress"] = 0 + logger.error(f"[MinerU] Task {task_id}: exception - {e}") + + task["updatedAt"] = datetime.utcnow().isoformat() + + +# ── Entry point ────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + import uvicorn + port = 2026 + print(f"🚀 FairScan PC Server starting on http://0.0.0.0:{port}") + print(f" Stream: http://localhost:{port}") + print(f" Dashboard: http://localhost:{port}/dashboard") + print(f" Health: http://localhost:{port}/health") + print(f" Upload: POST http://localhost:{port}/upload/pdf") + print(f" Tasks: POST http://localhost:{port}/tasks/process") + uvicorn.run(app, host="0.0.0.0", port=port, log_level="info") diff --git a/pc-server/requirements.txt b/pc-server/requirements.txt new file mode 100644 index 0000000..22cd8c5 --- /dev/null +++ b/pc-server/requirements.txt @@ -0,0 +1,4 @@ +fastapi>=0.104.0 +uvicorn[standard]>=0.24.0 +websockets>=12.0 +Pillow>=10.0.0 diff --git a/requirements/FIXES_SUMMARY.md b/requirements/FIXES_SUMMARY.md new file mode 100644 index 0000000..a57ade6 --- /dev/null +++ b/requirements/FIXES_SUMMARY.md @@ -0,0 +1,195 @@ +# UI 扩展修复总结 + +## 修改文件清单 + +### 1. SettingsRepository.kt +**变更**:添加了网络协作相关的配置项和枚举类型 + +**新增内容**: +- 7个新的 `stringPreferencesKey`: + - `SERVER_HOST` - PC主机地址 + - `SERVER_PORT` - PC端口 + - `SERVER_DISPLAY_NAME` - PC显示名称 + - `LAST_SELECTED_SERVICE_ID` - 上次选择的服务ID + - `STREAM_QUALITY` - 图传质量 + - `POST_PROCESS_MODE` - 后处理模式 + - `AUTO_DOWNLOAD_PROCESSED_RESULT` - 自动下载处理结果开关 + +- 7个新的 Flow 属性用于读取这些配置 + +- 8个新的 suspend fun setter 方法: + - `setServerHost()` + - `setServerPort()` + - `setServerDisplayName()` + - `setLastSelectedServiceId()` + - `setStreamQuality()` + - `setPostProcessMode()` + - `setAutoDownloadProcessedResult()` + +- 2个新的 enum 类型: + - `StreamQuality(LOW, BALANCED, HIGH)` - 图传质量档位 + - `PostProcessMode(MARKDOWN, OCRPDF)` - 后处理模式 + +**修复**:修复了第231行缺少类闭合括号的问题 + +--- + +### 2. SettingsViewModel.kt +**变更**:扩展了 UI 状态数据类和 combine flow + +**新增内容**: +- 扩展 `SettingsUiState` 数据类,添加了12个新字段: + - `serverHost: String?` + - `serverPort: Int` + - `serverDisplayName: String?` + - `lastSelectedServiceId: String?` + - `streamQuality: StreamQuality` + - `postProcessMode: PostProcessMode` + - `autoDownloadProcessedResult: Boolean` + +- 8个新的 ViewModel 方法与 Repository 的 setter 对接: + - `setServerHost()` + - `setServerPort()` + - `setServerDisplayName()` + - `setLastSelectedServiceId()` + - `setStreamQuality()` + - `setPostProcessMode()` + - `setAutoDownloadProcessedResult()` + +**修复**: +- 使用 `Array` 方式重写了 `combine()` 的 lambda,解决了12个参数类型推断失败的问题 +- 使用数组索引方式访问组合流的值,避免了 lambda 参数过多导致的编译错误 + +--- + +### 3. SettingsScreen.kt +**变更**:添加了网络协作 UI 界面 + +**新增内容**: +- 8个新的 lambda 参数到 `SettingsScreen()` 函数: + - `onServerHostChanged` + - `onServerPortChanged` + - `onStreamQualityChanged` + - `onPostProcessModeChanged` + - `onAutoDownloadChanged` + - `onScanNetworkHostsClick` + - `onTestConnectionClick` + +- 新增 "Network Collaboration" 部分 UI,包括: + - PC 服务器配置(主机地址和端口输入框) + - 当前连接状态显示 + - "扫描主机" 和 "测试连接" 按钮 + - 图传质量选择(低/均衡/高三档) + - 后处理模式选择(Markdown/OCR PDF) + - 自动下载处理结果开关 + +**修复**: +- 第309行:添加了缺失的 `SettingsContent` 函数结束的闭合括号 `}` +- 第230行:移除了 `keyboardType = KeyboardType.Number` 参数,改用基础的 `OutlinedTextField`,避免版本兼容性问题 + +--- + +### 4. strings.xml +**变更**:添加了新的本地化字符串资源 + +**新增内容**: +- `settings_section_network` - "Network Collaboration" 标题 +- `stream_quality` - "Stream Quality" 选项标题 +- `post_process_mode` - "Post Process Mode" 选项标题 + +这些资源用于 UI 显示,遵循现有的资源命名规范。 + +--- + +### 5. MainActivity.kt +**变更**:更新了 `SettingsScreen` 的调用 + +**新增内容**: +- 6个新的回调参数传递到 `SettingsScreen()`: + - `onServerHostChanged = { host -> settingsViewModel.setServerHost(host) }` + - `onServerPortChanged = { port -> settingsViewModel.setServerPort(port) }` + - `onStreamQualityChanged = { quality -> settingsViewModel.setStreamQuality(quality) }` + - `onPostProcessModeChanged = { mode -> settingsViewModel.setPostProcessMode(mode) }` + - `onAutoDownloadChanged = { enabled -> settingsViewModel.setAutoDownloadProcessedResult(enabled) }` + - `onScanNetworkHostsClick = { /* TODO */ }` + - `onTestConnectionClick = { /* TODO */ }` + +--- + +## 编译错误修复 + +### 原始错误 +1. **SettingsRepository.kt:231** - 缺少类闭合括号 +2. **SettingsScreen.kt:309** - 缺少函数结束括号 +3. **SettingsScreen.kt:230** - OutlinedTextField 的 keyboardType 参数不兼容 +4. **SettingsViewModel.kt:65** - combine 的 lambda 参数类型推断失败(12个参数过多) +5. **MainActivity.kt:283** - SettingsScreen 调用缺少新参数 + +### 修复方案 +1. 添加了缺失的闭合括号 +2. 使用 Array 方式重写 combine 的 lambda 参数,解决类型推断问题 +3. 移除了不兼容的 OutlinedTextField 参数 +4. 完整更新了所有调用点的参数传递 + +--- + +## 后续待办项目 + +这些是实现计划中的下一步任务: + +### P0:局域网发现与基础连接 +- Task P0-2:实现局域网发现基础能力(NSD) +- Task P0-3:补充网络基础设施(HTTP 客户端) + +### P1:实时图传 +- Task P1-2:实现帧压缩与抽帧策略 +- Task P1-3:相机页接入图传控制 + +### P2:手机本地 PDF 上传 +- Task P2-1:实现 PDF 上传客户端 + +### P3:统一处理任务与结果下载 +- Task P3-1:实现统一任务接口客户端 + +### P4:体验优化 +- Task P4-1:发现结果去重与缓存 + +--- + +## 验证步骤 + +1. **编译验证**: + ```bash + ./gradlew clean build + ``` + +2. **单元测试**(如果有): + ```bash + ./gradlew testDebugUnitTest + ``` + +3. **运行应用**: + - 打开应用 + - 进入设置页面 + - 验证新的"Network Collaboration"部分能正常显示 + - 验证所有输入框和按钮响应正常 + +--- + +## 技术细节 + +### 为什么使用 Array 方式处理 combine? +Kotlin 的 combine 函数最多支持约 9 个参数的类型推断,超过这个数量会导致编译器无法自动推断 lambda 参数类型。通过使用数组方式,我们规避了这个限制,同时保持代码的可读性。 + +### 为什么移除了 keyboardType? +某些 Jetpack Compose 版本中,`OutlinedTextField` 可能不支持 `keyboardType` 参数,或者参数名称/位置不同。通过使用基础的 `OutlinedTextField` API,我们确保代码与更多版本的 Compose 兼容。 + +--- + +## 文件修改统计 + +- 修改文件数:5 个 +- 新增代码行数:约 150 行 +- 修复编译错误:5 处 +- 新增功能点:20+ 个(包括新的参数、方法、UI 元素) + diff --git a/requirements/FairScan_reqirement.prg b/requirements/FairScan_reqirement.prg new file mode 100644 index 0000000..dc5ee95 Binary files /dev/null and b/requirements/FairScan_reqirement.prg differ diff --git a/requirements/IMPLEMENTATION_COMPLETE.md b/requirements/IMPLEMENTATION_COMPLETE.md new file mode 100644 index 0000000..6c0c90a --- /dev/null +++ b/requirements/IMPLEMENTATION_COMPLETE.md @@ -0,0 +1,293 @@ +# 实现完成报告(完整版 v3) + +## 执行概览 + +✅ **状态**:Streaming (P1) + Upload/Task Pipeline (P2/P3) + MinerU 真实接入 + 任务管理面板 + Markdown ZIP 打包 已完成 + +**最近更新**:2026-06-04 +**范围**:MinerU 真实 markdown 处理、任务管理面板(手机端)、ZIP 打包下载、HF_HUB_OFFLINE 离线模式 + +--- + +## 已完成的工作 + +### P1:实时网络图传 ✅ + +#### 网络基础设施 +| 文件 | 说明 | +|------|------| +| `network/ServerEndpoint.kt` | 服务端点模型(host/port/url/wsUrl) | +| `network/NetworkInfoProvider.kt` | 本地 IP 获取 | +| `network/stream/StreamState.kt` | 图传状态模型(Disconnected/Connecting/Connected/Error) | +| `network/stream/StreamQualityPreset.kt` | 质量预设 ↔ StreamQuality 映射 | +| `network/stream/FrameCompressor.kt` | JPEG 压缩 + 缩放 | +| `network/stream/FrameDropController.kt` | 丢帧控制(AtomicBoolean + 时间间隔) | +| `network/stream/OkHttpStreamClient.kt` | WebSocket 图传客户端(包含 StreamClient 接口) | + +#### 相机页集成 +- `CameraViewModel`:添加 streamState、streamTargetHost、toggleStreaming()、sendStreamFrame() +- `liveAnalysis()` 中嵌入图传帧发送(fire-and-forget,不影响 ML 分析) +- `CameraScreen`:添加 StreamToggleButton(Cast 图标 + 状态颜色 + 主机显示) +- 图传断连不影响正式扫描 + +#### PC 服务器 +- `pc-server/main.py`:FastAPI 服务,含 `/health`、`WS /stream`、Web 预览页面 +- 支持帧广播:接收手机帧并转发给浏览器客户端 +- 帧率统计和日志 + +#### 帧率控制 +- 添加 `StreamFrameRate` 枚举(UNLIMITED / FPS_15 / FPS_10 / FPS_5) +- 设置页 RadioButton 选择 +- 无限制模式:minIntervalMs <= 0,仅以 isSending 状态控制 + +--- + +### P2/P3:PDF 上传与任务处理流水线 ✅ + +#### 上传与处理分离(最新重构) + +遵循 `pc-api-spec.md` 接口规范,将上传和处理解耦为独立步骤: + +**上传(纯传输)**: +- `POST /upload/pdf` → 返回 `fileId`(201 Created) +- 仅保存 PDF 到 `./uploads/`,不触发任何处理 +- PC 服务端使用独立 `files_db` 字典存储文件记录 + +**处理(任务创建)**: +- `POST /tasks/process` → 基于 `fileId` + `processType` 创建任务(202 Accepted) +- `processType` 可选值:`ocrpdf`、`markdown` +- 异步模拟处理:queued → processing(10%→50%→90%) → completed +- OCR PDF 模式:复制原始 PDF 作为处理结果(修复了之前空白 PDF 的问题) +- Markdown 模式:生成示例 `.md` 文件 + +#### Android 端网络客户端 + +| 文件 | 说明 | +|------|------| +| `network/upload/PdfUploadClient.kt` | HTTP multipart POST 上传 PDF,返回 `(fileId, fileName, sizeBytes)` | +| `network/tasks/TaskModels.kt` | 任务数据模型(TaskStatus / ArtifactInfo / ProcessTaskResult) | +| `network/tasks/TaskClient.kt` | REST 客户端:`processPdf(fileId, processType)`、查询状态、产物列表、下载 | + +#### 导出页三按钮 UI + +Android 导出页新增三个独立操作按钮: + +1. **仅传输到电脑** — `uploadPdfToServer()`:纯上传,设置 `Uploaded(fileId, taskId=null)` +2. **上传并处理 (OCR PDF)** — `uploadAndProcess("ocrpdf")`:上传 + 创建 OCR 任务 +3. **上传并处理 (Markdown)** — `uploadAndProcess("markdown")`:上传 + 创建 Markdown 任务 + +`UploadState` 状态模型: +- `Idle` — 未操作 +- `Uploading(progress)` — 上传中 +- `Uploaded(fileId, taskId?)` — 上传成功,taskId 为 null 表示纯传输 +- `Error(message)` — 上传失败 + +--- + +### PC 管理面板 ✅ + +浏览器访问 `/dashboard`,包含: + +#### 统计卡片 +- 已上传文件数 +- 处理任务数(排队中/处理中/已完成) + +#### 文件列表 +- 显示所有已上传的原始 PDF +- 列:文件名、文件 ID、大小、时间、操作 +- 操作列提供 ⬇ 下载按钮(`/files/{fileId}/download`) + +#### 任务列表 +- 显示所有处理任务及其状态 +- 列:文件名、任务 ID、状态(带 badge)、进度条、处理类型、时间、操作 +- 已完成任务的操作列提供 ⬇ 下载产物按钮 + +#### 导航 +- 顶部导航栏可在图传预览页(/)和管理面板(/dashboard)间切换 +- 自动刷新(每 2 秒) + +--- + +--- + +### MinerU 真实接入 ✅ + +替换了之前的模拟 markdown 处理,使用真实 MinerU pipeline 后端: + +- 使用 `aio_do_parse()` 异步接口,不阻塞 FastAPI 事件循环 +- Pipeline 后端配置:`backend="pipeline"`, `parse_method="auto"` +- 环境:Conda 环境 `MinerU`(Python 3.10.20, PyTorch 2.6.0+cu124, CUDA 12.4) +- GPU: NVIDIA RTX 4060 Laptop (8 GB VRAM) +- 模型缓存路径:`C:/Users/32892/.cache/huggingface/hub/` +- `HF_HUB_OFFLINE=1` 强制使用本地缓存,绕过国内网络不可达 huggingface.co 的问题 + +**MinerU markdown 输出**: +- `{name}.md` — markdown 产物 +- `images/` — 提取的图片资源 +- `{name}_result.zip` — `.md + images/` 的完整打包(新增,便于手机端下载后直接使用) + +**MinerU ocrpdf 输出**: +- `{name}_layout.pdf` — 带布局框的 PDF(当前模式) +- 注:此处不是真正的 OCRmyPDF 双层 PDF,详见 NEXT_STEPS + +### Markdown ZIP 打包 ✅ + +PC 服务器 markdown 处理完成后,自动检查 `images/` 目录: +- 有图片 → 打包 `{name}.md` + `images/` 为 `{name}_result.zip` +- 无图片 → 仅保留 `.md` 产物 +- 两种产物(`.md` 和 `.zip`)均注册为独立 artifact,客户端可按需下载 +- `download_artifact` 支持 `application/zip` MIME 类型 + +### 手机端任务管理面板 ✅ + +在导出页底部新增 `TaskPanelSection` UI 组件: + +- **任务状态显示**:排队中 / 处理中(进度条) / 已完成 / 失败 +- **后台轮询**:2 秒间隔轮询 PC 任务状态,自动更新 UI,完成后自动停止 +- **下载到指定目录**:用户点击"选择目录" → SAF 文件夹选择器 → 点击"下载" → 保存到指定目录 +- **产物优选**:markdown 任务默认下载 ZIP;ocrpdf 任务默认下载 PDF +- **下载进度**:实时显示下载进度条 +- **已下载状态**:显示"已下载 — 打开"按钮,可打开文件 + +涉及文件: +- `ExportUiState.kt`:新增 `RemoteTask`、`TaskPanelState`、`DownloadState` +- `ExportViewModel.kt`:新增 `_taskPanelState`、轮询逻辑、`downloadResult()` +- `ExportScreen.kt`:新增 `TaskPanelSection`、`TaskRow` UI 组件 +- `ExportActions`:新增 `downloadResult`、`resetDownloadState` 回调 + +### Bug 修复 + +| 问题 | 原因 | 修复 | +|------|------|------| +| MinerU 无法处理(SSL 错误) | `huggingface_hub` 启动时在线校验 revision | `main.py` 顶部设置 `HF_HUB_OFFLINE=1` | +| `main.py` 重复 `@Composable` 编译错误 | 编辑失误 | 移除重复注解 | +| `ButtonDefaults.TextButtonContentPadding.copy()` 不存在 | Material3 API 差异 | 改用 `PaddingValues()` 直接构造 | +| `DownloadState.Error` 不含 taskId | 无法区分哪个任务的错误 | 添加 `taskId` 参数 | +| WebSocket.send(ByteArray) 编译错误 | OkHttp WebSocket.send 需要 ByteString | 使用 `toByteString()` 扩展 | +| 网络权限未申请 | 旧 `tools:node="remove"` 删除声明 | 移除冲突行 | +| 明文通信被禁止 | `` 不支持 CIDR | 改用 `` | +| 帧未显示在浏览器 | 服务器未广播帧到浏览器客户端 | 添加 broadcast 循环 | +| 端口输入框"删除不干净" | `toIntOrNull()` 返回 null 后未更新 | 用 `remember` + `LaunchedEffect` | +| 下载的 PDF 为空白页 | `_create_minimal_pdf()` 缺少内容流 | 改为复制原始上传文件 | +| 上传进度卡在 0% | `upload_pdf` 未启动 `simulate_processing` | 添加 `asyncio.create_task`(后因分离重构移除) | +| Preview 函数编译错误 | 缺少 `onUploadAndProcess` 参数 | 添加 `onUploadAndProcess = {}` | + +--- + +## 架构总结 + +### 完整数据流 + +``` +相机预览 → liveAnalysis() + ├── → ML 分析(不变) → 文档页面 + │ + ├── → Streaming(图传开启时) + │ FrameCompressor → FrameDropController → OkHttpStreamClient → PC WS /stream → Browser + │ + └── → 拍照 → 处理 → PDF 生成 + ExportViewModel + ├── uploadPdfToServer() + │ → PdfUploadClient.uploadPdf() → PC POST /upload/pdf + │ → 返回 fileId → Uploaded(fileId, taskId=null) + │ + └── uploadAndProcess(processType) + → PdfUploadClient.uploadPdf() → PC POST /upload/pdf → fileId + → TaskClient.processPdf(fileId, processType) → PC POST /tasks/process → taskId + → Uploaded(fileId, taskId) +``` + +### PC 服务端架构 + +``` +files_db (dict): fileId → {fileId, fileName, sizeBytes, uploadPath, createdAt} +tasks_db (dict): taskId → {taskId, fileId, status, progress, processType, ...} +artifacts_db (dict): taskId → [{artifactId, fileName, ...}] +artifacts_map (dict): artifactId → {artifactId, fileName, filePath, ...} +``` + +### AppContainer 新增注入 +``` +- networkInfoProvider +- okHttpClient +- streamClient: StreamClient +- pdfUploadClient: PdfUploadClient +- taskClient: TaskClient +``` + +--- + +## PC 端端点总览 + +| 端点 | 方法 | 功能 | +|------|------|------| +| `/health` | GET | 健康检查 | +| `/` | GET | 图传预览页面 | +| `/stream` | WS | 接收 JPEG 帧 | +| `/dashboard` | GET | 管理面板页面 | +| `/api/dashboard` | GET | 管理面板 JSON 数据 | +| `/upload/pdf` | POST | 上传 PDF(纯上传,201) | +| `/tasks/process` | POST | 创建处理任务(202) | +| `/tasks/{taskId}` | GET | 查询任务状态 | +| `/tasks/{taskId}/artifacts` | GET | 查询任务产物列表 | +| `/artifacts/{artifactId}/download` | GET | 下载处理产物 | +| `/files/{fileId}/download` | GET | 下载已上传的原始文件 | + +--- + +## 文件清单 + +### 新增文件(Android 网络层) +1. `network/ServerEndpoint.kt` +2. `network/NetworkInfoProvider.kt` +3. `network/stream/StreamState.kt` +4. `network/stream/StreamQualityPreset.kt` +5. `network/stream/FrameCompressor.kt` +6. `network/stream/FrameDropController.kt` +7. `network/stream/OkHttpStreamClient.kt` +8. `network/upload/PdfUploadClient.kt` +9. `network/tasks/TaskModels.kt` +10. `network/tasks/TaskClient.kt` +11. `res/xml/network_security_config.xml` +12. `network/discovery/DiscoveredHost.kt`(占位,待 P0 实现) +13. `network/discovery/DiscoveryState.kt`(占位,待 P0 实现) +14. `network/discovery/LanServiceDiscovery.kt`(占位,待 P0 实现) + +### 新增文件(PC) +15. `pc-server/main.py` + +### 修改文件 + +| 文件 | 修改内容 | +|------|---------| +| `gradle/libs.versions.toml` | 添加 OkHttp 4.12.0 | +| `app/build.gradle.kts` | 添加 OkHttp 依赖 | +| `AndroidManifest.xml` | 添加网络权限、网络安全配置 | +| `FairScanApp.kt` | 添加 okHttpClient、streamClient、pdfUploadClient、taskClient | +| `CameraViewModel.kt` | 添加图传字段和方法、帧率控制 | +| `CameraScreen.kt` | 添加 StreamToggleButton | +| `SettingsRepository.kt` | 添加 StreamFrameRate、ServerHost、ServerPort 等 | +| `SettingsViewModel.kt` | 添加 streamFrameRate、serverHost 等字段 | +| `SettingsScreen.kt` | 添加帧率选择、网络配置 UI | +| `MainActivity.kt` | 添加上传回调、taskPanelState 收集、downloadResult 回调 | +| `ExportViewModel.kt` | 添加 uploadPdfToServer()、uploadAndProcess()、downloadResult()、startPolling() | +| `ExportUiState.kt` | 添加 UploadState、RemoteTask、TaskPanelState、DownloadState | +| `ExportScreen.kt` | 添加上传按钮、TaskPanelSection、TaskRow UI 组件 | +| `pc-server/main.py` | 添加 MinerU 真实接入、ZIP 打包、HF_HUB_OFFLINE | + +--- + +## 待实现 + +| 项目 | 状态 | +|------|------| +| **OCRmyPDF 真实接入** | **📌 下一步**(当前 ocrpdf 用 MinerU 生成 layout PDF,非真正双层可搜索 PDF) | +| NSD 局域网自动发现 | 📌 占位(接口已定义) | +| 设置页"扫描主机"/"测试连接"按钮功能 | 📌 待实现 | +| 图传延迟/帧率实时显示 | 🔜 可优化 | + +--- + +**修改人**:Claude Code +**最后更新**:2026-06-04 +**修改类型**:Feature - Streaming + Upload/Process Pipeline + Dashboard + Real MinerU + Task Panel + ZIP diff --git a/requirements/NEXT_STEPS.md b/requirements/NEXT_STEPS.md new file mode 100644 index 0000000..fd221ca --- /dev/null +++ b/requirements/NEXT_STEPS.md @@ -0,0 +1,145 @@ +# 下一步实现计划 + +## 现状总结 + +✅ **P1 实时图传**:已完成 +✅ **P2/P3 上传与任务处理**:已完成 +✅ **MinerU 真实接入**:已完成(markdown 处理 + ZIP 打包) +✅ **任务管理面板**:已完成(手机端轮询 + 下载到指定目录) + +### MinerU markdown 已实现 +- 使用 `aio_do_parse()` 异步接口,pipeline 后端 +- `HF_HUB_OFFLINE=1` 使用本地缓存模型 +- 输出 `.md` + `images/` + `{name}_result.zip` 三种 artifact + +### 任务管理面板已实现 +- 手机端 `TaskPanelSection`:排队中 / 处理中 / 已完成 / 失败 四种状态 +- 2 秒轮询 PC 任务状态,自动更新 UI +- SAF 目录选择 → 下载到指定目录 → 打开文件 + +### 当前 ocrpdf 的局限性 + +⚠️ 当前 `processType=ocrpdf` 使用 MinerU 的 `f_draw_layout_bbox=True` 生成 layout PDF(在 PDF 上画布局框),**不是真正的 OCR 双层 PDF**。 + +真正的 OCRmyPDF 应该: +- 保留原始 PDF 的视觉外观 +- 在图像层上叠加透明文字层(text layer) +- 结果可通过 Ctrl+F 搜索文字 +- 文件可被屏幕阅读器朗读 + +--- + +## 下一步:OCRmyPDF 真实接入 🔥 + +### 目标 +用 `ocrmypdf` 库替换当前 MinerU 的 layout PDF 生成,产出真正的可搜索双层 PDF。 + +### 为什么需要 OCRmyPDF 而不是继续用 MinerU 做 ocrpdf + +| 特性 | MinerU layout PDF | OCRmyPDF | +|------|-------------------|----------| +| 可搜索文字 | ❌ 仅图片上的框 | ✅ 透明文字层 | +| 保留原始外观 | ❌ 重新渲染 | ✅ 原样保留 | +| 文件大小 | 较小 | 完整保留原 PDF | +| 用途 | 可视化版面分析 | 归档、检索、无障碍 | + +### 实现方案 + +`ocrmypdf` 是一个 Python 命令行工具/库,在 MinerU 的 conda 环境中安装: + +```bash +conda activate MinerU +pip install ocrmypdf +``` + +**统一环境说明**:MinerU 和 OCRmyPDF 共用一个 conda 环境 `MinerU`,PC 服务器始终在该环境下运行: + +```bash +conda activate MinerU +cd pc-server +python main.py +``` + +### 环境信息 + +| 项目 | 值 | +|------|-----| +| Conda 环境名 | `MinerU` | +| 环境路径 | `D:/ProgramData/miniconda3/envs/MinerU/` | +| Python | 3.10.20 | +| PyTorch | 2.6.0+cu124 | +| CUDA | 12.4 | +| GPU | RTX 4060 Laptop (8 GB VRAM) | +| MinerU | 3.0.9(已接入 markdown) | +| OCRmyPDF | 15.4.4(✅ 已安装,源码 `F:/datasets_rm/ocRmypdf`,v15.4.4 标签) | +| Tesseract | ❌ 待安装(OCRmyPDF 必需依赖) | +| 用途 | MinerU markdown 处理 + OCRmyPDF 双层 PDF 处理 | + +### 安装 Tesseract + +OCRmyPDF 依赖 Tesseract 做实际 OCR 文字识别。Windows 安装: + +```bash +# 方式1:conda(推荐,与 MinerU 同一环境) +conda activate MinerU +conda install -c conda-forge tesseract + +# 方式2:手动安装 +# 下载安装包:https://github.com/UB-Mannheim/tesseract/wiki +# 安装后确认: +tesseract --list-langs # 应包含 chi_sim, eng +``` + +安装中文语言包: +```bash +# conda 方式 +conda install -c conda-forge tesseract-lang + +# 或手动下载 chi_sim.traineddata 放到 tessdata 目录 +``` + +然后在 `pc-server/main.py` 的 `process_with_mineru` 中,`ocrpdf` 分支改为调用 OCRmyPDF: + +```python +import ocrmypdf + +# ocrpdf 分支 +ocrmypdf.ocr( + upload_path_src, # 输入 PDF + str(output_dir / f"{base_name}_ocr.pdf"), # 输出 PDF + language="chi_sim", # 中文简体 + output_type="pdf", + skip_text=True, # 跳过已有文字层 + deskew=True, # 纠偏 + clean=True, # 清理 +) +``` + +输出:真正的可搜索双层 PDF。 + +### 语言映射 + +| MinerU lang | OCRmyPDF language | +|-------------|-------------------| +| `ch` | `chi_sim` | +| `en` | `eng` | +| `japan` | `jpn` | +| `korean` | `kor` | + +### 待确认 + +- [x] `ocrmypdf` 已安装到 MinerU conda 环境(v15.4.4) +- [ ] Tesseract OCR 引擎已安装 +- [ ] Tesseract 语言包(`chi_sim`, `eng`)已安装 + +--- + +## P0:局域网发现与连接校验(待排期) + +### 目标 +让手机能够自动发现同一局域网中的 FairScan PC 服务。 + +### 已有占位文件 +- `network/discovery/LanServiceDiscovery.kt`(接口定义) +- `network/discovery/DiscoveryState.kt`(状态模型) +- `network/discovery/DiscoveredHost.kt`(主机模型) diff --git a/requirements/implementation-plan.md b/requirements/implementation-plan.md new file mode 100644 index 0000000..0935c26 --- /dev/null +++ b/requirements/implementation-plan.md @@ -0,0 +1,1385 @@ +# FairScan 功能细化规划(执行版) + +> 基于 `requirements/requirements.md` 的进一步落地拆解。 +> 本文件用于实现阶段的任务规划、模块拆分、接口约定、风险控制与验收标准。 +> +> 📌 **实现状态**: +> - P1(实时图传)✅ 已完成 +> - P2(PDF 上传)+ P3(统一处理任务)✅ 已完成 +> - 上传与处理已分离为独立接口,符合 pc-api-spec.md 规范 +> - 导出页提供三个按钮:仅传输 / 传输+OCR PDF / 传输+Markdown +> - PC 管理面板 ✅ 已完成(/dashboard) +> - MinerU 真实接入 ✅ 已完成(markdown 处理 + ZIP 打包 + 任务管理面板) +> - OCRmyPDF 🔥 **下一步** +> - P0(局域网发现)📌 待排期 +> +> 重要说明: +> +> - 本文档**不是只写给当前对话中的执行者**。 +> - 本文档应被视为一份**可交给任意工程执行者或 AI 编码代理**的实施说明。 +> - 例如 Claude Code、其他 AI 编码工具、或人工开发者,都应能依据本文件理解目标、边界、优先级与接口契约。 +> - 因此本文档尽量避免“只对当前上下文成立”的描述,改为更稳定的模块边界、任务拆分与接口说明。 + +## 0. 核心结论 + +本项目后续新增能力,分成两条彼此解耦的主链路: + +1. **实时图传链路** + - 目标:低延迟、稳定预览 + - 输入:手机相机预览帧 + - 输出:PC 实时显示画面 + - 特点:允许丢帧,不追求完整性 + +2. **文档处理链路** + - 目标:正式文档处理与结果回到手机 + - 输入:手机端本地生成的 PDF + - 输出:Markdown 或 OCR 后的 PDF + - 特点:追求正确性与完整性,可异步处理 + +同时明确以下原则: + +- **不在 App 内主动切换 Wi‑Fi** +- **局域网相机功能的本质是实时图传,不是文档处理** +- **文档处理只基于手机本地正式生成的 PDF,不基于图传流** +- **PC 端后处理只需预留统一任务接口,MinerU 与 OCRmyPDF 复用同一套处理协议** +- **处理结果回到手机,推荐通过“手机轮询状态 + 手机主动下载产物”实现** + +--- + +## 1. 当前项目基础能力梳理 + +结合现有代码,当前 Android 端已经具备以下基础: + +- 文档扫描采集与页面处理 + - 相机实时预览、分割、文档边缘检测:`app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt:98` + - 拍照后生成处理页:`app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt:138` + - 裁切/页面编辑/顺序调整入口:`app/src/main/java/org/fairscan/app/MainActivity.kt:190` +- PDF/JPEG 导出 + - 导出准备与结果管理:`app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt:66` + - PDF 生成入口:`app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt:76` + - PDF 写入封装:`app/src/main/java/org/fairscan/app/data/FileManager.kt:46` +- 设置页基础结构 + - 设置存储:`app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsRepository.kt:31` + - 设置界面:`app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsScreen.kt:57` +- 应用容器可扩展网络模块注入 + - 容器入口:`app/src/main/java/org/fairscan/app/FairScanApp.kt:46` + +这意味着后续新增能力不需要重做扫描器,而是围绕以下两条主链路扩展: + +1. **实时图传链路**:手机相机预览 -> PC 实时显示 +2. **文档处理链路**:手机扫描成 PDF -> PC 处理 -> 结果回到手机 + +--- + +阅读的文档顺序 + + 1. IMPLEMENTATION_COMPLETE.md - 了解已完成的工作 + 2. FIXES_SUMMARY.md - 理解技术决策和修复方案 + 3. NEXT_STEPS.md - 明确下一步的实现方向 + 你每次完成一个任务就需要更新这些文档 + +## 2. 需求重述与产品化定义 + +### 2.1 原有离线扫描功能 + +这部分以现有实现为基础,后续原则是: + +- 保持当前扫描主流程稳定 +- 不让实时图传影响正式扫描质量 +- 后续所有文档处理都复用现有扫描结果 + +建议统一定义“文档结果对象”: + +- 页面集合(已裁切、已旋转、已滤镜处理) +- 本地导出的 PDF 文件 +- 可选 JPEG 文件集合 +- 文档元信息(文件名、页数、时间) + +### 2.2 局域网相机功能 + +这里明确产品定位: + +- 该功能本质上是**局域网实时图传** +- 主要目标是:**低延迟、稳定、可预览** +- PC 端主要用途是: + - 实时观察手机画面 + - 辅助取景 + - 远端监看 +- **不把图传流直接作为 MinerU/OCRmyPDF 的输入** +- **不从图传流直接生成正式 PDF** + +也就是说: + +- 图传负责“看” +- 扫描链路负责“生成正式文档” + +### 2.3 离线扫描 PDF 发送到 PC 主机 + +主链路定义如下: + +- 手机端完成扫描与页面处理 +- 手机端本地生成 PDF +- 手机端把 PDF 发送到 PC 主机 +- PC 主机接收 PDF 后再执行后处理任务 + +这条链路的好处是: + +- 延续现有扫描架构 +- 手机端产物清晰、统一 +- PC 端只负责重型处理任务 +- 图传模块与文档处理模块不会互相拖累 + +### 2.4 统一 PC 后处理能力 + +这里明确一个关键约束: + +- **PC 端后处理只需要预留统一接口,不需要为 MinerU 与 OCRmyPDF 设计两套完全不同的协议。** + +建议抽象为一套统一任务模型: + +- 输入:手机上传的 PDF +- 处理类型:`markdown` 或 `ocrpdf` +- 输出: + - `markdown` -> 返回 `.md` 及相关资源 + - `ocrpdf` -> 返回处理后的 `.pdf` + +也就是说: + +- **接口统一** +- **任务状态统一** +- **产物查询统一** +- **差异只体现在处理类型和产物 MIME 类型** + +### 2.5 结果回到手机 + +从产品描述上可以说: + +- PC 处理完毕后,把结果回传到手机 + +但从工程实现角度,推荐这样定义: + +- 手机端上传后拿到 `taskId` +- 手机端轮询任务状态 +- 任务完成后,手机端拉取产物列表 +- 手机端主动下载产物到本地 + +这样做比“PC 主动推送到手机”更适合 Android: + +- 不需要手机长期监听端口 +- 更适合前台/后台切换 +- 更容易失败重试 +- 也更适合未来交由不同执行者实现 + +--- + +## 3. 建议的总体架构 + +建议按“手机端 + PC 端服务”的双端架构设计。 + +### 3.1 两条主链路 + +#### A. 实时图传链路 + +```text +手机相机预览帧 +-> 抽帧/压缩 +-> WebSocket 发送 +-> PC 实时预览 +``` + +特点: + +- 目标是低延迟 +- 允许丢帧 +- 不追求逐帧可靠到达 +- 不参与正式 PDF 生成 + +#### B. 文档处理链路 + +```text +手机扫描采集 +-> 手机本地页面处理 +-> 手机本地生成 PDF +-> HTTP 上传到 PC +-> 创建统一处理任务 +-> PC 执行对应处理器 +-> 手机查询任务状态 +-> 手机下载处理结果 +``` + +特点: + +- 目标是正确性与完整性 +- 使用正式文档产物作为输入 +- 允许异步处理 +- 与图传链路解耦 + +### 3.2 手机端职责(当前仓库) + +- 继续负责扫描采集、裁切、页面编辑、导出 PDF +- 提供局域网发现与主机配置能力 +- 提供实时图传能力 +- 上传本地 PDF 到 PC +- 发起统一处理任务 +- 轮询任务状态并下载结果 +- 在 UI 中展示连接状态、任务状态、结果入口 + +### 3.3 PC 端职责(建议新建单独项目) + +- 广播局域网服务信息 +- 接收图传帧并实时显示 +- 接收手机上传的 PDF 文件 +- 暴露统一处理任务接口 +- 根据处理类型调用 MinerU 或 OCRmyPDF +- 提供任务查询与产物下载接口 + +### 3.4 推荐协议选型 + +为了降低复杂度,建议如下: + +- 局域网发现:**mDNS / NSD** +- 实时图传:**WebSocket 二进制帧** +- 文件上传:**HTTP multipart/form-data** +- 任务创建:**HTTP POST** +- 任务状态查询:**HTTP 轮询** +- 结果下载:**HTTP GET** + +第一版不建议直接做: + +- App 内切换 Wi‑Fi +- 自定义复杂 UDP 视频协议 +- PC 主动推送文件到手机 +- 多主机自动同步 + +### 3.5 关于实时性的技术取舍 + +由于实时性优先,建议这样分层: + +#### V1 方案 + +- 低频抽帧 +- JPEG 压缩 +- WebSocket 发送 +- 明确丢帧策略 + +优点: + +- 实现快 +- 调试简单 +- 适合先验证局域网低延迟图传是否满足需求 + +#### V2 升级预案(仅在 V1 不满足时再评估) + +- 使用 MediaCodec 编码 H.264 +- 再评估 WebRTC / RTP / 更底层的视频链路 + +不建议首版直接上 V2,因为会显著提高开发复杂度。 + +--- + +## 4. 模块拆分规划 + +## 4.1 模块 A:局域网连接与主机发现 + +### 目标 + +让手机端可以: + +- 自动发现同一局域网中的 FairScan PC 服务 +- 手动填写主机地址作为兜底 +- 保存当前选中的 PC 主机配置 + +### 手机端新增能力 + +- 新增网络设置项 + - 主机 IP / 域名 + - 端口 + - 协议(第一期建议仅 http) +- 显示当前手机 IP +- 显示当前发现到的主机列表 +- 支持一键选择已发现主机 +- 测试连接按钮 +- 可选:打开系统 Wi‑Fi 设置入口 + +### 局域网发现机制 + +推荐采用: + +- **mDNS/NSD 自动发现 + 手动输入兜底** + +#### 推荐服务标识 + +- mDNS service type:`_fairscan._tcp` +- service instance name:`FairScan-PC-{deviceName}` + +#### 推荐 TXT Record 字段 + +- `name`:设备显示名 +- `features`:`upload,stream,process,download` +- `version`:PC 服务版本 +- `apiVersion`:接口版本 + +说明: + +- 这里建议把后处理能力统一成 `process`,而不是在发现层暴露过多具体工具细节。 +- 是否支持 `markdown` / `ocrpdf`,可以通过健康检查响应或任务能力字段进一步细分。 + +#### Android 端发现状态模型 + +```text +DiscoveryState +- Idle +- Discovering +- Success(list) +- Empty +- Error(message) +``` + +#### 发现结果建议结构 + +```text +DiscoveredHost +- serviceName +- displayName +- host +- port +- features[] +- version +- lastSeenAt +- isReachable +``` + +### 建议新增数据项 + +在 `SettingsRepository` 中新增: + +- `serverHost` +- `serverPort` +- `serverDisplayName` +- `lastSelectedServiceId` +- `streamQuality` +- `postProcessMode` +- `autoDownloadProcessedResult` + +### 建议涉及文件 + +- `app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsRepository.kt` +- `app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsScreen.kt` +- `app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsViewModel.kt` +- 建议新增: + - `app/src/main/java/org/fairscan/app/network/NetworkInfoProvider.kt` + - `app/src/main/java/org/fairscan/app/network/ServerEndpoint.kt` + - `app/src/main/java/org/fairscan/app/network/discovery/LanServiceDiscovery.kt` + - `app/src/main/java/org/fairscan/app/network/discovery/NsdLanServiceDiscovery.kt` + - `app/src/main/java/org/fairscan/app/network/discovery/DiscoveryState.kt` + - `app/src/main/java/org/fairscan/app/network/discovery/DiscoveredHost.kt` + +### 验收标准 + +- 用户可以扫描到局域网中的 FairScan PC 服务 +- 用户可以从发现列表中选择目标主机 +- 自动发现失败时仍可手动输入地址 +- 选择主机后可通过 `GET /health` 校验连通性 + +--- + +## 4.2 模块 B:实时网络图传 + +### 目标 + +把手机作为一个**低延迟局域网实时图传设备**,让 PC 端可以实时看到画面。 + +### 功能定位 + +这一模块的核心不是文档处理,而是: + +- 实时预览 +- 低延迟 +- 稳定连接 +- 图传状态清晰 + +### 推荐实现路线 + +第一版建议: + +- 从 CameraX 预览/分析流中取图 +- 按固定频率抽帧 +- 压缩为 JPEG +- 通过 WebSocket 发送二进制帧到 PC +- PC 端显示实时画面 + +当前可接入入口: + +- `app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt:98` + +### 关键原则 + +- **必须允许丢帧**,不能把帧积压在队列里 +- **必须以实时性优先**,而不是完整性优先 +- **图传失败不能影响正式扫描** +- **图传流不参与正式 PDF 生成** + +### 压缩档位建议 + +| 档位 | 最长边 | JPEG质量 | 目标FPS | 用途 | +|---|---:|---:|---:|---| +| Low | 640 | 45 | 8~12 | 低延迟预览 | +| Balanced | 960 | 60 | 6~10 | 默认 | +| High | 1280 | 75 | 5~8 | 高清预览 | + +### 手机端子任务 + +1. 设计图传状态模型 +2. 新增帧压缩器 +3. 新增图传客户端 +4. 增加丢帧策略 +5. 在相机界面增加图传开关和状态提示 +6. 增加断线重连策略 +7. 增加简单性能指标 + +### 建议涉及文件 + +已有接入点: + +- `app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt:98` +- `app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt` +- `app/src/main/java/org/fairscan/app/ui/screens/camera/CameraPreview.kt` + +建议新增: + +- `app/src/main/java/org/fairscan/app/network/stream/StreamClient.kt` +- `app/src/main/java/org/fairscan/app/network/stream/FrameCompressor.kt` +- `app/src/main/java/org/fairscan/app/network/stream/StreamState.kt` +- `app/src/main/java/org/fairscan/app/network/stream/StreamQualityPreset.kt` +- `app/src/main/java/org/fairscan/app/network/stream/FrameDropController.kt` + +### 验收标准 + +- 手机连接 PC 后可持续发送实时预览帧 +- 在局域网环境下主观延迟明显低于文件传输式刷新 +- 不同压缩档位的延迟和清晰度有可见差异 +- 图传开启时,正式扫描/导出功能仍可正常使用 + +--- + +## 4.3 模块 C:手机本地生成 PDF 并上传到 PC + +### 目标 + +保持现有手机扫描方案不变,在手机端完成正式文档生成后,再把 PDF 上传给 PC。 + +### 推荐实现路线 + +- 手机端按现有流程完成扫描 +- 使用现有导出逻辑生成 PDF +- 调用上传接口把 PDF 发给 PC + +当前稳定入口: + +- `app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt:76` +- `app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt:84` +- `app/src/main/java/org/fairscan/app/data/FileManager.kt:46` + +### 设计原则 + +- 正式文档输入只认手机本地生成的 PDF +- 不使用实时图传流直接做正式文档处理 +- 上传是文档处理链路的前半段,不与实时图传混合 + +### 手机端子任务 + +1. 定义上传请求/响应模型 +2. 导出页增加“发送到 PC”按钮 +3. 支持发送后立即创建后处理任务,或只上传不处理 +4. 展示上传进度与结果 +5. 记录返回的文件标识或任务标识 + +### 建议涉及文件 + +- `app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt:66` +- `app/src/main/java/org/fairscan/app/MainActivity.kt:203` +- 建议新增: + - `app/src/main/java/org/fairscan/app/network/upload/PdfUploadClient.kt` + - `app/src/main/java/org/fairscan/app/network/upload/UploadModels.kt` + - `app/src/main/java/org/fairscan/app/ui/screens/export/NetworkExportState.kt` + +### 验收标准 + +- 用户可以把当前扫描结果导出成 PDF 并发送给 PC +- 上传进度可见 +- 成功后能拿到后续处理所需的文件标识或任务标识 + +--- + +## 4.4 模块 D:统一 PC 后处理接口 + +### 目标 + +为 PC 端预留**一套统一的后处理任务接口**,由处理类型决定具体使用 MinerU 还是 OCRmyPDF。 + +### 设计原则 + +- 统一任务创建接口 +- 统一任务状态接口 +- 统一产物列表接口 +- 统一产物下载接口 +- 差异只体现在: + - `processType` + - 输出文件类型 + +### 推荐任务类型 + +```text +processType +- markdown +- ocrpdf +``` + +### 输入输出约定 + +#### 输入 + +- 一个已上传或本次提交的 PDF +- 一个 `processType` + +#### 输出 + +- 当 `processType=markdown` + - 主要产物:`.md` + - 可选附加产物:图片资源目录、JSON、日志 +- 当 `processType=ocrpdf` + - 主要产物:处理后的 `.pdf` + - 可选附加产物:日志、识别报告 + +### 推荐接口契约 + +#### 1. 创建任务 + +- `POST /tasks/process` + +请求体建议: + +```json +{ + "fileId": "uploaded-file-id", + "processType": "markdown", + "options": {} +} +``` + +#### 2. 查询任务状态 + +- `GET /tasks/{taskId}` + +返回体建议: + +```json +{ + "taskId": "task-123", + "status": "running", + "processType": "markdown", + "progress": 50, + "message": "processing", + "artifacts": [] +} +``` + +#### 3. 查询产物列表 + +- `GET /tasks/{taskId}/artifacts` + +返回体建议: + +```json +[ + { + "artifactId": "artifact-1", + "fileName": "result.md", + "mimeType": "text/markdown", + "role": "primary" + } +] +``` + +#### 4. 下载产物 + +- `GET /artifacts/{artifactId}/download` + +### 手机端实现含义 + +手机端不需要关心 PC 内部到底调用了 MinerU 还是 OCRmyPDF,只需要关心: + +- 要处理哪个文件 +- 处理类型是什么 +- 当前任务状态是什么 +- 可以下载哪些产物 + +### 这样设计的好处 + +- PC 端工具实现可替换 +- 后续新增其他处理器时接口不必大改 +- 文档可交给不同 AI 或开发者实现而不易跑偏 +- Android 端状态管理更统一 + +### 建议新增文件 + +- `app/src/main/java/org/fairscan/app/network/tasks/TaskClient.kt` +- `app/src/main/java/org/fairscan/app/network/tasks/TaskModels.kt` +- `app/src/main/java/org/fairscan/app/network/tasks/ArtifactDownloadClient.kt` +- `app/src/main/java/org/fairscan/app/ui/screens/export/ProcessingTaskState.kt` + +### 验收标准 + +- 手机端可通过统一接口发起 `markdown` 任务 +- 手机端可通过统一接口发起 `ocrpdf` 任务 +- 任务状态查询逻辑对两类任务保持一致 +- 产物下载逻辑对两类任务保持一致 + +--- + +## 4.5 模块 E:处理结果回到手机端 + +### 目标 + +让 PC 端的处理结果真正回到手机端,而不是停留在 PC 上。 + +### 推荐实现方式 + +产品上: + +- 结果回传手机 + +技术上: + +- 手机端主动下载 + +### 为什么不建议 PC 主动推送到手机 + +- Android 端长期开放监听端口不稳定 +- 前后台切换复杂 +- 手机网络权限与系统限制更多 +- 局域网内由手机主动拉取更简单可靠 + +### 手机端下载结果后的处理建议 + +- 下载到应用缓存目录 +- 再提供: + - 打开 + - 分享 + - 保存到用户指定目录 + - 作为新文档导入应用 + +### 验收标准 + +- 任务完成后手机端可获取产物列表 +- 手机端可把产物下载回本地 +- 下载失败时可重试 +- 用户可明确区分“任务失败”和“下载失败” + +--- + +## 5. 推荐开发阶段划分 + +## 第一阶段:局域网发现与连接校验 + +目标:先把“发现谁、连接谁”做稳定。 + +### 范围 + +- 设置页新增 PC 主机配置 +- mDNS/NSD 发现机制 +- 手动输入兜底 +- `GET /health` 健康检查 + +### 阶段验收 + +- 手机端可自动发现同局域网中的 FairScan PC 服务 +- 发现失败时可手动输入地址 +- 健康检查可明确提示成功/失败 + +--- + +## 第二阶段:实时图传闭环 + +目标:优先完成低延迟实时图传。 + +### 范围 + +- 图传客户端 +- 相机页图传开关 +- 图传压缩档位 +- PC 端实时预览 +- 复用第一阶段的主机发现结果 + +### 阶段验收 + +- 手机端可以把实时预览帧发送到 PC +- PC 端能低延迟显示画面 +- 图传质量档位切换有效 +- 图传失败不影响正常扫描 + +--- + +## 第三阶段:手机本地 PDF 上传到 PC + +目标:打通文档处理链路的前半段。 + +### 范围 + +- 手机本地导出 PDF +- 上传到 PC +- 上传状态展示 + +### 阶段验收 + +- 当前扫描文档可生成 PDF 并上传至 PC +- 上传结果可见 +- 可关联后续处理任务 + +--- + +## 第四阶段:统一处理任务与结果下载 + +目标:打通文档处理链路的后半段。 + +### 范围 + +- 统一处理任务接口 +- `markdown` 处理类型 +- `ocrpdf` 处理类型 +- 手机查询任务状态 +- 手机下载结果文件 + +### 阶段验收 + +- 手机可用统一接口发起两种处理类型 +- PC 可根据类型调用不同处理器 +- 结果文件可从 PC 下载回手机 + +--- + +## 第五阶段:体验优化 + +### 范围 + +- 最近连接记录 +- 自动下载开关 +- 失败重试 +- 后台通知 +- 发现结果去重与缓存 +- 可选:UDP/网段探测作为发现兜底 + +--- + +## 5.1 局域网发现专项方案 + +### 目标 + +在不切换 Wi‑Fi 的前提下,让手机端自动发现同一局域网中运行中的 FairScan PC 服务。 + +### 主方案 + +- PC 端广播 `_fairscan._tcp` +- Android 端 NSD 搜索 `_fairscan._tcp` +- 用户确认选择目标主机 +- 手机端调用 `GET /health` 校验 + +### 发现流程 + +1. PC 启动 HTTP 服务 +2. PC 注册 mDNS 服务 +3. 手机点击“扫描局域网主机” +4. Android NSD 开始发现 +5. 解析 host、port、features +6. 展示设备列表 +7. 用户选择设备 +8. 手机执行 `GET /health` +9. 校验成功后保存为当前目标主机 + +### 备用方案触发条件 + +只有在以下情况长期存在时,才考虑第二通道: + +- 大量设备无法发现 mDNS +- 企业网络场景较多 +- Windows 防火墙导致广播不稳定 + +此时再评估: + +- 自定义 UDP 广播 +- 指定网段 HTTP 探测 + +默认不建议首版同时做两套发现机制。 + +--- + +## 6. Android 端建议修改点清单 + +## 6.1 权限与清单 + +当前 Manifest 尚未为新网络能力补齐联网权限: + +- `app/src/main/AndroidManifest.xml:1` + +后续大概率需要新增: + +- `android.permission.INTERNET` +- `android.permission.ACCESS_WIFI_STATE` +- 第一版不建议使用 `android.permission.CHANGE_WIFI_STATE` + +如果使用明文 HTTP 局域网服务,还要评估: + +- network security config +- 或仅在开发阶段允许局域网明文流量 + +## 6.2 设置页 + +当前设置页很适合作为局域网协作入口: + +- `app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsScreen.kt:57` +- `app/src/main/java/org/fairscan/app/ui/screens/settings/SettingsRepository.kt:31` + +建议新增“局域网协作”分组: + +- PC 主机地址 +- 端口 +- 当前手机 IP +- 扫描局域网主机按钮 +- 已发现主机列表 +- 图传质量预设 +- 默认处理类型 +- 自动下载结果开关 +- 测试连接按钮 +- 打开系统 Wi‑Fi 设置快捷入口(可选) + +## 6.3 相机页 + +当前相机实时分析入口适合接入图传: + +- `app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt:98` + +建议新增: + +- 图传开关 +- 图传状态文本 +- 当前目标主机提示 +- 压缩档位快速切换 +- 近似发送帧率或发送状态提示 + +## 6.4 导出页 + +当前导出能力已较完整: + +- `app/src/main/java/org/fairscan/app/ui/screens/export/ExportViewModel.kt:66` + +建议新增按钮: + +- 发送到 PC +- 发送并处理(Markdown) +- 发送并处理(OCR PDF) +- 下载处理结果(当任务完成后显示) + +## 6.5 任务与结果页面 + +建议后续增加一个轻量任务状态区域,至少包含: + +- 上传中 +- 等待处理 +- 处理中 +- 处理成功 +- 处理失败 +- 下载中 +- 下载成功 +- 下载失败 + +## 6.6 AppContainer 注入 + +当前容器适合新增网络模块注入: + +- `app/src/main/java/org/fairscan/app/FairScanApp.kt:46` + +建议后续加入: + +- `lanServiceDiscovery` +- `streamClient` +- `uploadClient` +- `taskClient` +- `artifactDownloadClient` +- `networkInfoProvider` + +--- + +## 7. PC 端服务建议最小方案 + +### 7.1 目标边界 + +PC 端在本阶段只需要提供: + +- 图传接收能力 +- 文件接收能力 +- **统一处理任务接口骨架** +- 任务状态返回能力 +- 产物下载能力 + +这里特别强调: + +- 对当前阶段来说,PC 端**只需预留好统一接口** +- MinerU 与 OCRmyPDF 的内部执行器可以后续再逐步接入 +- 但接口契约应先稳定下来,避免 Android 端后面反复改协议 + +### 7.2 推荐技术路线 + +如果 MinerU 与 OCRmyPDF 最终都运行在 Python 环境,最自然的是: + +- PC 端统一使用 **Python FastAPI** + +### 7.3 PC 端最小能力 + +- HTTP 服务 +- WebSocket 图传接收 +- mDNS 服务广播 +- 上传文件保存 +- 统一任务接口 +- 任务状态查询 +- 结果文件下载 + +### 7.4 最小接口建议 + +- `GET /health` + - 健康检查 +- `WS /stream` + - 实时图传接收 +- `POST /upload/pdf` + - 上传 PDF +- `POST /tasks/process` + - 发起统一处理任务 +- `GET /tasks/{id}` + - 查询任务状态 +- `GET /tasks/{id}/artifacts` + - 查询产物列表 +- `GET /artifacts/{artifactId}/download` + - 下载产物 + +### 7.5 统一处理任务接口示例 + +#### 创建任务请求示例 + +```json +{ + "fileId": "uploaded-file-id", + "processType": "ocrpdf", + "options": {} +} +``` + +#### 状态返回示例 + +```json +{ + "taskId": "task-123", + "status": "completed", + "processType": "ocrpdf", + "progress": 100, + "message": "done" +} +``` + +#### 产物列表示例 + +```json +[ + { + "artifactId": "artifact-1", + "fileName": "result.pdf", + "mimeType": "application/pdf", + "role": "primary" + } +] +``` + +### 7.6 目录规划建议 + +- `incoming/` 原始上传文件 +- `tasks/` 任务工作目录 +- `outputs/pdf/` OCR 输出 +- `outputs/markdown/` Markdown 输出 +- `logs/` 服务日志与任务日志 + +### 7.7 广播能力建议 + +mDNS TXT Record 中建议广播: + +- `stream=1` +- `upload=1` +- `process=1` +- `download=1` +- `apiVersion=1` + +如需更细粒度,也可通过 `GET /health` 返回: + +```json +{ + "name": "FairScan-PC-Office", + "status": "ok", + "features": ["stream", "upload", "process", "download"], + "processTypes": ["markdown", "ocrpdf"] +} +``` + +--- + +## 8. 风险与难点 + +## 8.1 实时图传的延迟与稳定性 + +风险: + +- CameraX 分析 + 压缩 + 网络发送会叠加延迟 +- 发送队列积压会使画面越来越慢 + +应对: + +- 固定抽帧 +- 只保留最新帧 +- 上一帧未发完则直接丢弃当前帧 +- 后台线程压缩与发送 +- 优先保证低延迟而不是高保真 + +## 8.2 Android 网络与 Wi‑Fi 限制 + +风险: + +- 获取 Wi‑Fi 名称和网络信息在新系统上限制较多 +- App 内切换 Wi‑Fi 成本高且兼容性差 + +应对: + +- 不在 App 内切换 Wi‑Fi +- 只显示网络信息 +- 必要时提供“打开系统 Wi‑Fi 设置”入口 + +## 8.3 局域网发现兼容性 + +风险: + +- 某些路由器可能屏蔽 mDNS 多播 +- Windows 防火墙可能阻止服务广播 +- 无线与有线终端可能被隔离 + +应对: + +- 使用 `NSD + 手动输入 + /health 校验` 三段式兜底 +- 设置页提供简单排障提示 +- 后续再评估 UDP/网段探测 + +## 8.4 PC 后处理环境依赖 + +风险: + +- MinerU 与 OCRmyPDF 依赖较重 +- Python 环境与外部工具安装复杂 +- 内部执行器后续可能替换实现 + +应对: + +- Android 端只依赖统一处理接口,不依赖具体工具细节 +- PC 端先稳定接口契约,再逐步补充内部执行器 +- 通过 `processTypes` 或等价字段声明当前支持的处理类型 + +## 8.5 结果回到手机的可靠性 + +风险: + +- 任务成功但下载失败 +- 用户切到后台后任务状态丢失 +- 下载到本地后存储路径不清晰 + +应对: + +- 区分“任务失败”和“下载失败” +- 记录最近任务与产物信息 +- 下载后提供打开、分享、另存为入口 + +--- + +## 9. 建议的任务清单(可直接进入开发) + +## P0:局域网发现与基础连接 + +### Task P0-1:扩展设置仓库与设置页 + +- 增加 PC 主机地址和端口配置 +- 增加发现结果展示区域 +- 增加“扫描局域网主机”按钮 +- 增加图传质量配置 +- 增加默认处理类型配置 +- 增加自动下载结果开关 +- 显示手机当前 IP + +### Task P0-2:实现局域网发现基础能力 + +- 接入 Android NSD +- 定义 `DiscoveredHost` 与 `DiscoveryState` +- 扫描 `_fairscan._tcp` +- 发现后支持一键填充 host/port +- 失败时保留手动输入兜底 + +### Task P0-3:补充网络基础设施 + +- 增加网络权限 +- 增加基础 HTTP 客户端 +- 增加统一错误模型 +- 增加 `GET /health` 校验 + +### Task P0-4:实现 PC 最小服务骨架 + +- `GET /health` +- mDNS 注册 `_fairscan._tcp` +- 广播服务能力信息 + +## P1:实时图传 + +### Task P1-1:实现图传状态模型 + +- 未连接 +- 连接中 +- 已连接 +- 发送中 +- 出错 + +### Task P1-2:实现帧压缩与抽帧策略 + +- 低/中/高三档 +- 限帧率 +- 丢帧策略 +- 只保留最新帧 + +### Task P1-3:相机页接入图传控制 + +- 开关 +- 状态提示 +- 主机信息展示 +- 复用已发现或已保存的目标主机 + +### Task P1-4:实现 PC 端实时预览 + +- `WS /stream` +- 页面或桌面窗口显示 + +### Task P1-5:图传与发现联动 + +- 直接选择发现到的主机作为图传目标 +- 根据 `features` 判断主机是否支持 `stream` + +## P2:手机本地 PDF 上传 + +### Task P2-1:实现 PDF 上传客户端 + +- 导出页新增“发送到 PC”按钮 +- 生成 PDF 后发给 PC 服务 +- 展示上传进度和结果 + +### Task P2-2:联调上传闭环 + +- 手机扫描文档 +- 手机本地生成 PDF +- 上传到 PC 成功 +- 手动输入和自动发现两条路径都可工作 + +## P3:统一处理任务与结果下载 + +### Task P3-1:实现统一任务接口客户端 + +- 支持 `processType=markdown` +- 支持 `processType=ocrpdf` +- 统一查询任务状态 +- 统一查询产物列表 + +### Task P3-2:实现统一结果下载流程 + +- 下载 `.md` 结果 +- 下载 `.pdf` 结果 +- 基于 `mimeType` 决定本地处理方式 + +### Task P3-3:统一任务状态 UI + +- 等待中 +- 处理中 +- 成功 +- 失败 +- 下载中 +- 下载成功 +- 下载失败 + +### Task P3-4:PC 端统一接口占位实现 + +- `POST /tasks/process` +- `GET /tasks/{id}` +- `GET /tasks/{id}/artifacts` +- `GET /artifacts/{artifactId}/download` +- 先允许返回 mock 或占位结果,再逐步接入真实处理器 + +## P4:体验优化 + +### Task P4-1:发现结果去重与缓存 + +- 按 service name 或 host:port 去重 +- 记住最近选择设备 +- 显示最近发现时间 + +### Task P4-2:后台任务与通知 + +- 上传后台继续 +- 处理结果通知 +- 下载结果通知 + +### Task P4-3:高级兜底方案评估 + +- UDP 广播发现 +- 网段 HTTP 探测 +- 仅在主方案不稳定时启用 + +--- + +## 10. 建议的验收用例 + +### 用例 1:局域网发现 + +- PC 服务启动并广播 `_fairscan._tcp` +- 手机点击“扫描局域网主机” +- 手机看到可用设备列表 +- 用户选择设备后自动填充 host/port +- `GET /health` 验证通过 + +### 用例 2:实时图传 + +- 手机开启图传 +- PC 端能实时看到画面 +- 切换压缩档位后画面质量与延迟明显变化 +- 图传关闭后不影响正常扫描 + +### 用例 3:手机本地 PDF 上传 + +- 手机按原方案扫描 3 页文档 +- 手机本地生成 PDF +- 上传到 PC 成功 +- PC 成功保存原始 PDF + +### 用例 4:统一处理接口返回 Markdown 结果 + +- 手机使用 `processType=markdown` 发起任务 +- PC 返回任务状态 +- PC 提供 `.md` 结果下载 +- 手机成功下载 Markdown 结果 + +### 用例 5:统一处理接口返回 OCR PDF 结果 + +- 手机使用 `processType=ocrpdf` 发起任务 +- PC 返回任务状态 +- PC 提供 `.pdf` 结果下载 +- 手机成功下载 OCR 后 PDF + +### 用例 6:异常处理 + +- 主机地址错误 +- PC 服务未启动 +- mDNS 发现失败 +- 上传中断网 +- 当前 `processType` 不受支持 +- 下载结果失败 +- UI 都能给出明确失败信息 + +--- + +## 11. 建议的实现优先级 + +推荐开发顺序: + +1. 主机发现 + 健康检查 +2. 实时图传 +3. 手机本地 PDF 上传到 PC +4. 统一处理任务接口与结果下载 +5. 发现机制与任务体验优化 + +理由: + +- 你当前最关心的是实时图传,所以图传优先 +- 图传与文档处理解耦,优先完成图传不会阻碍后续 PDF 链路 +- PC 端后处理细节未来可替换,因此应先稳定统一接口契约 +- 文档处理链路应尽量让 Android 端只依赖抽象接口,不依赖具体工具内部实现 + +一句话概括: + +- **先把“实时看画面”做好,再把“正式处理文档并回到手机”做好;PC 内部处理器可以后补,但统一接口要先定下来。** + +--- + +## 12. 对执行者的说明 + +本节是写给任何执行这份文档的人或 AI 的。 + +### 12.1 不要误解的点 + +- “局域网相机”不是文档处理功能,而是实时图传功能。 +- 文档处理输入来自手机本地正式生成的 PDF,而不是图传流。 +- PC 端现阶段最重要的是统一接口契约,不是先把 MinerU/OCRmyPDF 完整接好。 +- Android 端应依赖统一处理接口,不要直接写死两套后处理协议。 + +### 12.2 可以接受的实现策略 + +- PC 端统一处理接口可以先返回 mock 结果 +- 只要契约稳定,内部执行器可以后续逐步替换成真实 MinerU / OCRmyPDF +- Android 端先把: + - 上传 + - 创建任务 + - 查询状态 + - 下载产物 + 的全链路走通即可 + +### 12.3 不建议的实现策略 + +- 不要把图传流直接接到 MinerU / OCRmyPDF +- 不要在 App 内做 Wi‑Fi 切换 +- 不要同时为 `markdown` 与 `ocrpdf` 设计两套完全独立的客户端协议 +- 不要让 Android 端过早耦合 PC 内部处理器实现细节 + +### 12.4 如果由 AI 编码代理执行 + +执行顺序建议严格按以下优先级推进: + +1. 发现与连接 +2. 实时图传 +3. PDF 上传 +4. 统一处理接口 +5. 结果下载 +6. 再考虑真实接入 MinerU/OCRmyPDF + +如果 AI 只能做一部分任务,优先确保: + +- 接口稳定 +- 状态模型稳定 +- UI 路径清晰 +- 占位实现可联调 + +--- + +## 13. 与原始需求的映射关系 + +- “文件原有的离线扫描功能” + - 对应当前现有扫描/导出链路 +- “手机网络图传功能” + - 对应模块 B,且定位为低延迟实时图传 +- “局域网内压缩广播的实时网络摄像头” + - 对应模块 B,第一版建议做点对点实时图传 +- “压缩力度可选” + - 对应图传质量预设 +- “离线扫描 PDF 通过 Wi‑Fi 协议发送给 PC” + - 对应模块 C +- “显示自己的 IP 和端口” + - 对应模块 A +- “MinerU 转成 Markdown” + - 对应统一处理接口中的 `processType=markdown` +- “OCRmyPDF 转成双层 PDF” + - 对应统一处理接口中的 `processType=ocrpdf` +- “PC 处理完毕后再传回手机” + - 对应模块 E,推荐技术实现为手机主动下载结果 diff --git a/requirements/mineru-integration.md b/requirements/mineru-integration.md new file mode 100644 index 0000000..0bf7adc --- /dev/null +++ b/requirements/mineru-integration.md @@ -0,0 +1,392 @@ +# MinerU 接入 FairScan PC Server 对接文档 + +> 本文档记录 MinerU 在本机的环境信息、API 用法,以及如何将其接入 FairScan PC 服务器, +> 替换当前的模拟处理逻辑。 + +--- + +## 1. 本机环境信息 + +> **统一环境**:MinerU 和 OCRmyPDF 共用一个 conda 环境 `MinerU`, +> PC 服务器始终通过 `conda activate MinerU` 启动。 + +| 项目 | 值 | +|------|-----| +| MinerU 源码路径 | `F:/datasets_rm/MinerU/` | +| **已安装版本** | **3.0.9** | +| **最新版本** | **3.2.2**(446 commits 差距) | +| Conda 环境 | `D:/ProgramData/miniconda3/envs/MinerU/` | +| Python | 3.10.20 | +| PyTorch | 2.6.0+cu124 | +| CUDA | 12.4 | +| GPU | NVIDIA GeForce RTX 4060 Laptop GPU (8 GB VRAM) | +| Transformers | 4.57.6 | +| onnxruntime | 1.23.2 | +| Pipeline 模型 | ✅ 已下载(HF cache: `C:/Users/32892/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0`) | +| VLM 模型 | ✅ 已下载(HF cache: `C:/Users/32892/.cache/huggingface/hub/models--opendatalab--MinerU2.5-2509-1.2B`) | +| HF Hub 离线模式 | ✅ `HF_HUB_OFFLINE=1`(main.py 启动时设置) | +| OCRmyPDF | ✅ v15.4.4 已安装(源码 `F:/datasets_rm/ocRmypdf`,同一 conda 环境) | +| Tesseract | ❌ 待安装(OCRmyPDF 必需依赖) | + +--- + +## 2. 前置准备:升级 MinerU(强烈建议) + +当前安装的 3.0.9 与最新 3.2.2 差距较大(446 commits),主要改进包括: + +- **`aio_do_parse()` 异步接口** — 可直接 await 调用,不阻塞 FastAPI 事件循环 +- **并发锁优化** — Layout/MFR/OCR 使用独立推理锁,减少 GPU 争用 +- **PDF 渲染修复** — 大量 PDFium 资源泄漏和崩溃修复 +- **图像分析** — 新增 `image_analysis` 参数 +- **Client-side 输出生成** — 新增 `client_side_output_generation` 选项 + +### 2.1 拉取最新代码 + +```bash +cd F:/datasets_rm/MinerU +git checkout main +git pull origin main +git checkout mineru-3.2.2-released +``` + +### 2.2 更新安装 + +```bash +conda activate MinerU +pip install -e . +``` + +### 2.3 验证 + +```bash +# 检查版本 +python -c "from mineru.version import __version__; print(__version__)" # 应为 3.2.2 + +# 验证模型可用 +python -c " +from mineru.utils.models_download_utils import auto_download_and_get_model_root_path +print('Pipeline:', auto_download_and_get_model_root_path('models/README.md', 'pipeline')) +print('VLM:', auto_download_and_get_model_root_path('/', 'vlm')) +" +``` + +> **注意**:如果之后需要用 `model_source=local` 指定自定义模型路径,才需要创建 `~/.mineru.json` 配置文件。默认的 HuggingFace 缓存模式不需要。 + +--- + +## 3. MinerU 编程接口 + +### 3.1 核心函数:`do_parse` + +```python +from mineru.cli.common import do_parse, read_fn +from mineru.utils.enum_class import MakeMode +from pathlib import Path + +def do_parse( + output_dir: str, # 输出目录路径 + pdf_file_names: list[str], # PDF 文件名列表(不含扩展名) + pdf_bytes_list: list[bytes], # PDF 文件字节列表 + p_lang_list: list[str], # 语言列表("ch", "en", "japan" 等) + backend: str = "pipeline", # "pipeline" | "vlm-auto-engine" | "hybrid-auto-engine" + parse_method: str = "auto", # "auto" | "txt" | "ocr" + formula_enable: bool = True, + table_enable: bool = True, + server_url: str | None = None, # 远程服务器 URL(仅 http-client 后端) + f_dump_md: bool = True, # 输出 .md 文件 + f_dump_middle_json: bool = True, # 输出 _middle.json + f_dump_model_output: bool = True, # 输出 _model.json + f_dump_orig_pdf: bool = True, # 输出原始 PDF 副本 + f_dump_content_list: bool = True, # 输出 _content_list.json + f_draw_layout_bbox: bool = True, # 输出带布局框的 PDF + f_draw_span_bbox: bool = True, # 输出带 span 框的 PDF + f_make_md_mode: MakeMode = MakeMode.MM_MD, # Markdown 模式 + start_page_id: int = 0, + end_page_id: int | None = None, # None = 所有页 + **kwargs, +) +``` + +### 3.2 `read_fn` 辅助函数 + +```python +from mineru.cli.common import read_fn + +# 读取 PDF 文件为 bytes +pdf_bytes = read_fn("F:/path/to/doc.pdf") + +# 也支持图片文件(自动转为 PDF bytes) +png_bytes = read_fn("scan.png") +``` + +### 3.3 输出目录结构 + +Pipeline 后端(`backend="pipeline"`)输出: + +``` +{output_dir}/ + {pdf_name}/ + auto/ # parse_method="auto" + {pdf_name}.md # ★ Markdown 输出(主要产物) + {pdf_name}_middle.json # 中间解析结果 + {pdf_name}_model.json # 模型原始输出 + {pdf_name}_content_list.json + {pdf_name}_origin.pdf # 原始 PDF 副本 + {pdf_name}_layout.pdf # 布局可视化 + {pdf_name}_span.pdf # Span 可视化 + images/ # 提取的图片 +``` + +### 3.4 语言代码 + +| 代码 | 语言 | +|------|------| +| `ch` | 简体中文 | +| `ch_server` | 中文服务器版(较快) | +| `ch_lite` | 中文轻量版 | +| `en` | 英语 | +| `japan` | 日语 | +| `korean` | 韩语 | +| `chinese_cht` | 繁体中文 | + +--- + +## 4. 接入方案 + +### 方案 A:直接异步 API 调用(强烈推荐,需 v3.2.2) + +升级到 v3.2.2 后,可以直接使用 `aio_do_parse()` — MinerU 原生异步接口,无需 `asyncio.to_thread()`。 + +**优点**: +- **原生 async**,直接 await,不阻塞 FastAPI 事件循环 +- 最简单,不需要进程间通信 +- 可直接获取输出文件路径 + +**前提**: +- FairScan PC 服务器在 MinerU conda 环境中运行 +- `F:/datasets_rm/MinerU` 已通过 `pip install -e .` 安装 + +**实现思路**: + +```python +# ---- pc-server/main.py 新增代码 ---- + +from pathlib import Path +from mineru.cli.common import aio_do_parse, read_fn + +async def real_mineru_processing(task_id: str): + """使用 MinerU 异步接口真实处理 PDF""" + task = tasks_db.get(task_id) + if task is None: + return + + file_name = task.get("fileName", "document.pdf") + base_name = Path(file_name).stem + upload_path = Path(task["uploadPath"]) + process_type = task.get("processType", "ocrpdf") + lang = task.get("options", {}).get("lang", "ch") + + task["status"] = "processing" + task["progress"] = 10 + task["message"] = "MinerU processing started..." + + output_dir = TASKS_DIR / task_id + output_dir.mkdir(exist_ok=True) + pdf_bytes = read_fn(upload_path) + + try: + if process_type == "markdown": + await aio_do_parse( + output_dir=str(output_dir), + pdf_file_names=[base_name], + pdf_bytes_list=[pdf_bytes], + p_lang_list=[lang], + backend="pipeline", + f_dump_md=True, + f_dump_middle_json=False, + f_dump_model_output=False, + f_dump_orig_pdf=False, + f_dump_content_list=False, + f_draw_layout_bbox=False, + f_draw_span_bbox=False, + ) + md_path = output_dir / base_name / "auto" / f"{base_name}.md" + if md_path.exists(): + art_id = str(uuid.uuid4()) + artifacts_db[task_id] = [{ + "artifactId": art_id, "fileName": f"{base_name}.md", + "fileSize": md_path.stat().st_size, "fileType": "md", + "filePath": str(md_path), + }] + artifacts_map[art_id] = artifacts_db[task_id][0] + task.update(status="completed", progress=100, + message="MinerU Markdown completed") + return + + elif process_type == "ocrpdf": + await aio_do_parse( + output_dir=str(output_dir), + pdf_file_names=[base_name], + pdf_bytes_list=[pdf_bytes], + p_lang_list=[lang], + backend="pipeline", + f_dump_md=False, + f_dump_middle_json=False, + f_dump_model_output=False, + f_dump_orig_pdf=False, + f_dump_content_list=False, + f_draw_layout_bbox=True, + f_draw_span_bbox=False, + ) + layout_pdf = output_dir / base_name / "auto" / f"{base_name}_layout.pdf" + if layout_pdf.exists(): + art_id = str(uuid.uuid4()) + artifacts_db[task_id] = [{ + "artifactId": art_id, "fileName": f"{base_name}_ocr.pdf", + "fileSize": layout_pdf.stat().st_size, "fileType": "pdf", + "filePath": str(layout_pdf), + }] + artifacts_map[art_id] = artifacts_db[task_id][0] + task.update(status="completed", progress=100, + message="OCR PDF completed") + return + + task["status"] = "failed" + task["message"] = "MinerU did not produce output" + + except Exception as e: + task["status"] = "failed" + task["message"] = f"MinerU error: {str(e)}" + logger.error(f"MinerU task {task_id} failed: {e}") +``` + +### 方案 B:子进程调用(备选) + +通过 `subprocess` 调用 `mineru` CLI: + +```python +import subprocess +import asyncio + +async def mineru_subprocess(task_id: str): + task = tasks_db[task_id] + upload_path = task["uploadPath"] + output_dir = TASKS_DIR / task_id + + cmd = [ + r"D:/ProgramData/miniconda3/envs/MinerU/python.exe", + "-m", "mineru.cli.client", + "-p", str(upload_path), + "-o", str(output_dir), + "-b", "pipeline", + "-l", "ch", + ] + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + # 轮询进度(可选:监控 stdout 中的进度信息) + while True: + line = await proc.stdout.readline() + if not line: + break + # 解析进度... + + returncode = await proc.wait() + if returncode == 0: + task["status"] = "completed" + else: + task["status"] = "failed" +``` + +**优点**:进程隔离,MinerU 崩溃不影响 FairScan 服务。 +**缺点**:进度监控困难,需要 IPC。 + +### 方案 C:MinerU FastAPI 服务 + +运行 MinerU 自带的 FastAPI 服务 `mineru-api` 作为微服务,FairScan 通过 HTTP 调用。 + +这一方案与 pc-api-spec.md 中对原子服务的建议一致,但实现复杂度更高。 + +--- + +## 5. 与 pc-api-spec.md 的对应关系 + +根据接口规范,两种 `processType` 与 MinerU 的映射: + +| processType | MinerU 后端 | 输出文件 | 文件类型 | +|-------------|-----------|---------|---------| +| `markdown` | `backend="pipeline"` | `{name}.md` | `text/markdown` | +| `ocrpdf` | `backend="pipeline"` + `f_draw_layout_bbox=True` | `{name}_layout.pdf` | `application/pdf` | + +两种类型共用同一个 MinerU `do_parse` 调用,仅输出选项不同。 + +--- + +## 6. 接入步骤建议 + +### Step 1:升级 MinerU 到最新版 + +```bash +cd F:/datasets_rm/MinerU +git checkout main && git pull origin main +git checkout mineru-3.2.2-released +conda activate MinerU +pip install -e . +``` + +验证: +```bash +python -c "from mineru.cli.common import aio_do_parse; print('OK')" +``` + +### Step 2:切换 PC 服务器运行环境 + +```bash +conda activate MinerU +cd E:/race_save/FairScan_cyy/FairScan/pc-server +python main.py +``` + +### Step 3:替换 `simulate_processing` 为真实 MinerU 调用 + +在 `main.py` 中将 `simulate_processing` 替换为 `real_mineru_processing`(参考方案 A 的实现)。 + +### Step 4:端到端测试 + +1. 用小 PDF(1-2 页)先用 `parse_method="txt"` 测试(速度快) +2. 确认无误后切换为 `parse_method="auto"`(完整 OCR+公式+表格) +3. 测试处理完成后产物下载 + +--- + +## 7. 注意事项 + +| 项目 | 说明 | +|------|------| +| **GPU 显存** | RTX 4060 有 8GB VRAM。pipeline 后端约需 4-6GB,VLM 后端约需 6-8GB。建议用 pipeline 后端。 | +| **处理速度** | 普通 A4 PDF,pipeline 后端约 3-8 秒/页(取决于内容复杂度)。 | +| **语言** | 默认传 `ch`(简体中文)。FairScan 可扩展语言选择功能。 | +| **页数限制** | 可用 `start_page_id` / `end_page_id` 限制处理范围。 | +| **大文件** | PDF > 100 页建议分批处理。 | +| **超时** | 单次处理时间与页数成正比,不要设置过短的 HTTP 超时。 | +| **锁模型** | `do_parse` 不是线程安全的。FastAPI 的 `async` 端点应在线程池中调用,避免阻塞事件循环。 | +| **错误处理** | `do_parse` 出错会抛出异常,需捕获并设置 `task["status"] = "failed"`。 | + +--- + +## 8. 关键参考文件 + +| 文件 | 说明 | +|------|------| +| `F:/datasets_rm/MinerU/mineru/cli/common.py` | `do_parse()` 主入口 | +| `F:/datasets_rm/MinerU/mineru/cli/client.py` | CLI 参数定义 | +| `F:/datasets_rm/MinerU/mineru/cli/output_paths.py` | 输出路径解析 | +| `F:/datasets_rm/MinerU/mineru/utils/config_reader.py` | 配置读取 | +| `F:/datasets_rm/MinerU/mineru/utils/enum_class.py` | 枚举类型定义 | +| `F:/datasets_rm/MinerU/mineru.template.json` | 配置文件模板 | +| `E:/race_save/FairScan_cyy/FairScan/pc-server/main.py` | FairScan PC 服务器(需修改) | +| `E:/race_save/FairScan_cyy/FairScan/requirements/pc-api-spec.md` | API 接口规范 | diff --git a/requirements/pc-api-spec.md b/requirements/pc-api-spec.md new file mode 100644 index 0000000..389424e --- /dev/null +++ b/requirements/pc-api-spec.md @@ -0,0 +1,789 @@ +# FairScan PC 端统一接口规范(草案 v0.1) + +> 本文档定义 FairScan 手机端与 PC 端之间的最小稳定接口契约。 +> +> 适用对象: +> +> - 人工开发者 +> - Claude Code +> - 其他 AI 编码代理 +> +> 设计目标: +> +> - 让不同执行者都能按同一接口实现,不因上下文差异而跑偏 +> - 优先稳定协议与字段,而不是优先绑定具体内部实现 +> - 允许 PC 端先做“接口占位实现”,后续再逐步接入真实 MinerU / OCRmyPDF + +--- + +## 1. 设计范围 + +本文档覆盖以下能力: + +1. 局域网服务发现配套信息 +2. 健康检查接口 +3. 实时图传接口 +4. PDF 上传接口 +5. 统一处理任务接口 +6. 任务状态查询接口 +7. 处理产物查询接口 +8. 处理产物下载接口 + +本文档**不**约束以下内容: + +- PC 端内部具体使用什么库执行 MinerU +- PC 端内部具体使用什么方式调用 OCRmyPDF +- PC 端图传画面最终是显示在网页、桌面窗口还是其他 UI 中 +- Android 端 UI 的具体布局样式 + +也就是说: + +- **本文档约束的是“外部协议”** +- **不强制约束“内部实现”** + +--- + +## 2. 核心原则 + +### 2.1 图传与文档处理解耦 + +- 实时图传只负责低延迟画面预览 +- 正式文档处理只基于手机本地生成的 PDF +- 图传流不得直接作为 MinerU / OCRmyPDF 的正式输入 + +### 2.2 统一处理接口 + +PC 端后处理统一使用一套任务接口。 + +支持的处理类型: + +- `markdown` +- `ocrpdf` + +差异只体现在: + +- `processType` +- 返回产物的 MIME 类型 + +### 2.3 手机主动下载结果 + +“PC 处理后结果回到手机”在工程上定义为: + +- 手机查询任务状态 +- 手机获取产物列表 +- 手机主动下载产物 + +不要求 PC 主动回连手机进行推送。 + +### 2.4 允许占位实现 + +第一阶段允许 PC 端: + +- 返回 mock 任务 +- 返回 mock 产物 +- 先不真正接入 MinerU / OCRmyPDF + +只要对外接口契约稳定即可。 + +--- + +## 3. 术语定义 + +### 3.1 File + +指手机上传到 PC 的原始 PDF 文件。 + +### 3.2 Task + +指 PC 端异步处理任务。 + +### 3.3 Artifact + +指任务完成后可下载的结果文件。 + +### 3.4 Primary Artifact + +指该处理类型最核心的主产物: + +- `markdown` -> `.md` +- `ocrpdf` -> `.pdf` + +### 3.5 Auxiliary Artifact + +指附加产物,例如: + +- 资源图片 +- 日志文件 +- JSON 中间结果 +- 识别报告 + +--- + +## 4. 协议总览 + +| 能力 | 方法 | 路径 | 说明 | +|---|---|---|---| +| 健康检查 | GET | `/health` | 检查服务可用性与能力 | +| 实时图传 | WS | `/stream` | 接收手机实时图像帧 | +| 上传 PDF | POST | `/upload/pdf` | 上传正式文档 PDF | +| 创建处理任务 | POST | `/tasks/process` | 发起统一处理任务 | +| 查询任务状态 | GET | `/tasks/{taskId}` | 查询任务执行状态 | +| 查询任务产物 | GET | `/tasks/{taskId}/artifacts` | 获取结果文件列表 | +| 下载产物 | GET | `/artifacts/{artifactId}/download` | 下载结果文件 | +| 下载原始文件 | GET | `/files/{fileId}/download` | 下载已上传的原始 PDF | + +默认基础地址示例: + +```text +http://{host}:{port} +``` + +例如: + +```text +http://192.168.1.10:8080 +``` + +--- + +## 5. 通用约定 + +### 5.1 编码与格式 + +- JSON 请求与响应统一使用 UTF-8 +- 除下载接口外,默认返回 `application/json` +- 图传 WebSocket 使用二进制消息承载 JPEG 帧 + +### 5.2 ID 规则 + +以下字段都视为**不透明字符串**: + +- `fileId` +- `taskId` +- `artifactId` + +客户端不得依赖这些 ID 的内部结构。 + +### 5.3 时间字段 + +如果服务返回时间字段,建议使用 RFC 3339 / ISO 8601,例如: + +```text +2026-06-04T12:34:56Z +``` + +时间字段不是第一阶段强制要求,但如果提供,应统一格式。 + +### 5.4 状态枚举 + +任务状态建议使用以下枚举: + +```text +queued +running +completed +failed +``` + +如后续需要,可扩展: + +```text +canceled +``` + +### 5.5 错误返回格式 + +推荐所有错误统一返回: + +```json +{ + "error": { + "code": "INVALID_REQUEST", + "message": "processType is required" + } +} +``` + +推荐错误码: + +- `INVALID_REQUEST` +- `UNSUPPORTED_PROCESS_TYPE` +- `FILE_NOT_FOUND` +- `TASK_NOT_FOUND` +- `ARTIFACT_NOT_FOUND` +- `PROCESSING_FAILED` +- `SERVICE_UNAVAILABLE` + +### 5.6 版本兼容原则 + +- 第一阶段不强制引入 `/api/v1` 路径前缀 +- 通过 `apiVersion` 字段表达协议版本 +- 后续如需重大变更,再评估路径版本化 + +--- + +## 6. 局域网发现配套约定 + +### 6.1 mDNS 服务标识 + +- service type:`_fairscan._tcp` +- service instance name:`FairScan-PC-{deviceName}` + +### 6.2 推荐 TXT Record 字段 + +- `name`:设备显示名 +- `features`:`stream,upload,process,download` +- `apiVersion`:如 `1` +- `version`:PC 服务版本 + +### 6.3 关于 `process` 能力 + +这里建议广播能力使用: + +- `process` + +而不是直接广播多个内部工具名。 + +原因: + +- 发现层只需表达“能不能处理” +- 具体支持哪些 `processType`,可通过 `/health` 返回 +- 这样后续新增其他处理器时不需要修改发现层语义 + +--- + +## 7. 健康检查接口 + +## 7.1 GET `/health` + +### 作用 + +- 判断服务是否在线 +- 返回最小能力信息 +- 返回支持的处理类型 + +### 请求 + +无请求体。 + +### 成功响应示例 + +```json +{ + "name": "FairScan-PC-Office", + "status": "ok", + "version": "0.1.0", + "apiVersion": "1", + "features": ["stream", "upload", "process", "download"], + "processTypes": ["markdown", "ocrpdf"] +} +``` + +### 字段说明 + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `name` | string | 是 | 设备显示名 | +| `status` | string | 是 | 固定为 `ok` | +| `version` | string | 否 | PC 服务版本 | +| `apiVersion` | string | 是 | 接口版本 | +| `features` | string[] | 是 | 服务能力 | +| `processTypes` | string[] | 否 | 当前支持的处理类型 | + +### 状态码 + +- `200 OK` + +--- + +## 8. 实时图传接口 + +## 8.1 WS `/stream` + +### 作用 + +接收手机端发送的实时画面帧。 + +### 连接方式 + +- 客户端发起 WebSocket 连接 +- 连接成功后开始发送二进制帧 +- 每条二进制消息代表**一张完整 JPEG 图像** + +### 帧格式 + +- 二进制消息 +- 内容:JPEG 文件完整字节流 +- 一条消息 = 一帧 + +### 服务端要求 + +- 服务端可只保留最新帧 +- 服务端不要求逐帧确认 +- 服务端允许丢弃旧帧以保证实时性 + +### 客户端要求 + +- 不得无限积压待发送帧 +- 若上一帧尚未发完,允许直接丢弃当前帧 +- 连接断开后由客户端自行决定是否重连 + +### 第一阶段最小可接受行为 + +- 服务端只需能接收 JPEG 帧并显示或缓存最新一帧 +- 不要求复杂多端会话管理 +- 不要求录像、回放、时间轴等高级功能 + +### 状态码 + +- WebSocket Upgrade 成功即视为可用 + +--- + +## 9. PDF 上传接口 + +## 9.1 POST `/upload/pdf` + +### 作用 + +上传手机端正式生成的 PDF 文件。 + +### 请求类型 + +```text +multipart/form-data +``` + +### 表单字段 + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `file` | file | 是 | PDF 文件 | + +### 约束 + +- `file` 的 MIME 类型应为 `application/pdf` +- 服务端可根据需要限制上传大小 +- 若文件过大,建议返回 `413 Payload Too Large` + +### 成功响应示例 + +```json +{ + "fileId": "file-123", + "fileName": "Scan 2026-06-04 12.34.56.pdf", + "mimeType": "application/pdf", + "sizeBytes": 1048576 +} +``` + +### 字段说明 + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `fileId` | string | 是 | 服务端文件标识 | +| `fileName` | string | 是 | 保存后的文件名 | +| `mimeType` | string | 是 | 固定为 `application/pdf` | +| `sizeBytes` | number | 是 | 文件字节大小 | + +### 状态码 + +- `201 Created` +- `400 Bad Request` +- `413 Payload Too Large` +- `500 Internal Server Error` + +--- + +## 10. 统一处理任务接口 + +## 10.1 POST `/tasks/process` + +### 作用 + +使用统一接口发起后处理任务。 + +### 请求示例 + +```json +{ + "fileId": "file-123", + "processType": "markdown", + "options": {} +} +``` + +### 请求字段说明 + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `fileId` | string | 是 | 由上传接口返回的文件标识 | +| `processType` | string | 是 | `markdown` 或 `ocrpdf` | +| `options` | object | 否 | 预留扩展字段,首版可为空对象 | + +### 处理类型定义 + +| `processType` | 含义 | 预期主产物 | +|---|---|---| +| `markdown` | 执行 Markdown 转换 | `text/markdown` | +| `ocrpdf` | 执行 OCR PDF 处理 | `application/pdf` | + +### 成功响应示例 + +```json +{ + "taskId": "task-123", + "status": "queued", + "processType": "markdown", + "fileId": "file-123" +} +``` + +### 状态码 + +- `202 Accepted` +- `400 Bad Request` +- `404 Not Found`(`fileId` 不存在) +- `422 Unprocessable Entity`(`processType` 不支持时可选) +- `500 Internal Server Error` + +### 第一阶段占位实现要求 + +如果真实 MinerU / OCRmyPDF 尚未接入,允许这样实现: + +- 接口正常收请求 +- 正常返回 `taskId` +- 任务状态可直接从 `queued` -> `completed` +- 产物可先返回 mock 文件或占位文件 + +这样做的目标是: + +- 先稳定客户端协议 +- 先打通 Android 联调链路 +- 后续再逐步替换成真实处理器 + +--- + +## 11. 查询任务状态接口 + +## 11.1 GET `/tasks/{taskId}` + +### 作用 + +返回单个任务的当前状态。 + +### 成功响应示例 + +```json +{ + "taskId": "task-123", + "status": "running", + "processType": "markdown", + "fileId": "file-123", + "progress": 50, + "message": "processing", + "artifactsAvailable": false +} +``` + +### 字段说明 + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `taskId` | string | 是 | 任务标识 | +| `status` | string | 是 | `queued` / `running` / `completed` / `failed` | +| `processType` | string | 是 | 任务处理类型 | +| `fileId` | string | 是 | 输入文件标识 | +| `progress` | number | 否 | 建议 0~100 | +| `message` | string | 否 | 当前状态说明 | +| `artifactsAvailable` | boolean | 否 | 是否已有可下载产物 | + +### 失败响应示例 + +```json +{ + "error": { + "code": "TASK_NOT_FOUND", + "message": "task not found" + } +} +``` + +### 状态码 + +- `200 OK` +- `404 Not Found` + +--- + +## 12. 查询任务产物接口 + +## 12.1 GET `/tasks/{taskId}/artifacts` + +### 作用 + +列出某个任务已经生成的所有产物。 + +### 成功响应示例 + +#### Markdown 任务示例 + +```json +[ + { + "artifactId": "artifact-1", + "fileName": "result.md", + "mimeType": "text/markdown", + "role": "primary", + "sizeBytes": 2048 + } +] +``` + +#### OCR PDF 任务示例 + +```json +[ + { + "artifactId": "artifact-2", + "fileName": "result.pdf", + "mimeType": "application/pdf", + "role": "primary", + "sizeBytes": 3145728 + } +] +``` + +### 字段说明 + +| 字段 | 类型 | 必填 | 说明 | +|---|---|---|---| +| `artifactId` | string | 是 | 产物标识 | +| `fileName` | string | 是 | 文件名 | +| `mimeType` | string | 是 | MIME 类型 | +| `role` | string | 是 | `primary` / `auxiliary` / `log` | +| `sizeBytes` | number | 否 | 文件大小 | + +### 约束 + +- 对 `markdown`,应至少存在一个 `role=primary` 且 `mimeType=text/markdown` 的产物 +- 对 `ocrpdf`,应至少存在一个 `role=primary` 且 `mimeType=application/pdf` 的产物 + +### 状态码 + +- `200 OK` +- `404 Not Found` + +--- + +## 13. 产物下载接口 + +## 13.1 GET `/artifacts/{artifactId}/download` + +### 作用 + +下载指定产物文件。 + +### 响应 + +- 响应体为二进制文件流 +- `Content-Type` 应与产物 `mimeType` 一致 +- `Content-Disposition` 建议包含文件名 + +### 成功行为示例 + +- 下载 Markdown:`Content-Type: text/markdown` +- 下载 OCR PDF:`Content-Type: application/pdf` + +### 状态码 + +- `200 OK` +- `404 Not Found` + +--- + +## 14. 原始文件下载接口 + +## 14.1 GET `/files/{fileId}/download` + +### 作用 + +下载已上传但尚未处理的原始 PDF 文件。 + +### 响应 + +- 响应体为二进制文件流 +- `Content-Type: application/pdf` +- `Content-Disposition` 包含原始文件名 + +### 典型用途 + +- PC 管理面板中直接下载查看手机上传的原始 PDF +- 手机端重新获取已上传的文件 + +### 状态码 + +- `200 OK` +- `404 Not Found`(文件 ID 不存在或文件已从磁盘删除) + +--- + +## 15. 两类处理任务的差异说明 + +## 15.1 `processType=markdown` + +### 目标 + +把 PDF 处理为 Markdown 文档。 + +### 最低要求 + +- 至少返回一个 `.md` 主产物 + +### 可选附加产物 + +- 图片资源 +- 日志 +- JSON 中间结果 + +## 15.2 `processType=ocrpdf` + +### 目标 + +把 PDF 处理为 OCR 后的可搜索 PDF。 + +### 最低要求 + +- 至少返回一个 `.pdf` 主产物 + +### 可选附加产物 + +- 日志 +- 识别报告 + +--- + +## 16. 典型调用流程 + +## 16.1 实时图传流程 + +1. 手机发现并选择 PC 主机 +2. 手机调用 `/health` 确认支持 `stream` +3. 手机建立 `WS /stream` +4. 手机按抽帧策略发送 JPEG 帧 +5. PC 实时显示最新帧 + +## 16.2 文档处理流程 + +1. 手机本地生成 PDF +2. 手机 `POST /upload/pdf` +3. 手机获得 `fileId` +4. 手机 `POST /tasks/process` +5. 手机获得 `taskId` +6. 手机轮询 `GET /tasks/{taskId}` +7. 任务完成后,手机调用 `GET /tasks/{taskId}/artifacts` +8. 手机调用 `GET /artifacts/{artifactId}/download` +9. 手机保存、打开或分享结果 + +--- + +## 17. 第一阶段可接受的占位实现 + +如果当前目标只是让 Android 端和 PC 端先联调通,这一阶段允许: + +### 17.1 `markdown` 占位实现 + +- 收到 `processType=markdown` +- 直接生成一个示例 `.md` 文件 +- 任务短时间内进入 `completed` + +### 17.2 `ocrpdf` 占位实现 + +- 收到 `processType=ocrpdf` +- 直接复制输入 PDF 为新文件,或生成一个占位 PDF +- 任务短时间内进入 `completed` + +### 17.3 为什么允许这样做 + +这样可以先验证: + +- 接口字段是否稳定 +- Android 端状态流是否完整 +- 下载逻辑是否可用 +- 不同 `mimeType` 的本地处理是否正确 + +等这些都稳定后,再接入真实处理器更安全。 + +--- + +## 18. 对执行者的约束说明 + +本节适用于任何执行这份接口文档的人或 AI。 + +### 18.1 必须遵守的约束 + +- 不要为 `markdown` 和 `ocrpdf` 设计两套独立任务协议 +- 不要让 Android 端依赖 PC 内部执行器实现细节 +- 不要把图传流直接作为正式文档处理输入 +- 不要把“结果回到手机”实现成 PC 主动推送手机的唯一方式 + +### 18.2 优先级建议 + +如果执行资源有限,优先实现: + +1. `/health` +2. `/upload/pdf` +3. `/tasks/process` +4. `/tasks/{id}` +5. `/tasks/{id}/artifacts` +6. `/artifacts/{artifactId}/download` +7. `WS /stream` + +说明: + +- 如果当前主要目标是联调文档处理链路,可先暂缓图传 UI +- 如果当前主要目标是实时性验证,可先实现 `WS /stream` +- 但无论如何,统一处理接口契约应保持不变 + +--- + +## 19. 与 Android 端实现的对应关系 + +PC 接口与 Android 模块建议对应如下: + +| PC 接口 | Android 模块 | +|---|---| +| `/health` | discovery / server endpoint | +| `WS /stream` | stream client | +| `/upload/pdf` | upload client | +| `/tasks/process` | task client | +| `/tasks/{id}` | task polling logic | +| `/tasks/{id}/artifacts` | artifact query logic | +| `/artifacts/{artifactId}/download` | artifact download client | +| `/files/{fileId}/download` | raw file download client | + +--- + +## 20. 后续扩展预留 + +后续如果需要扩展,可在不破坏主契约的情况下增加: + +- 更多 `processType` +- 更多 `options` 字段 +- 任务取消接口 +- 批量任务接口 +- 任务日志查询接口 +- 结果 ZIP 打包下载接口 + +但第一阶段不建议过早加入这些扩展。 + +--- + +## 21. 一句话总结 + +这份接口规范的核心思想是: + +- **实时图传走一条轻量、低延迟链路** +- **文档处理走一条统一任务接口链路** +- **MinerU 与 OCRmyPDF 共用同一处理协议,只通过 `processType` 区分** +- **允许先用占位实现把联调跑通,再逐步接入真实处理器** diff --git a/requirements/requirements.md b/requirements/requirements.md new file mode 100644 index 0000000..940fac0 --- /dev/null +++ b/requirements/requirements.md @@ -0,0 +1,108 @@ +# FairScan + +> 此文档为项目需求文档 + +## 文件原有的离线扫描功能 + +- 相机实时预览、文档边缘检测、自动裁切 +- 页面编辑(裁切/旋转/滤镜/顺序调整) +- PDF/JPEG 导出 +- 多页扫描管理 + +## 手机网络图传功能 + +### 变成一个局域网内进行一定压缩广播的实时网络摄像头 + +- 手机端通过 WebSocket 将 JPEG 帧发送到 PC +- PC 端浏览器实时显示画面 +- 支持帧率控制(无限制 / 15fps / 10fps / 5fps) +- 丢帧策略:上一帧未发送完毕则丢弃当前帧,保证实时性 +- 连接状态显示(已连接/未连接/出错) + +#### 压缩力度可选 + +- **低质量**:最长边 640px,JPEG 质量 45,目标 8~12fps +- **均衡**:最长边 960px,JPEG 质量 60,目标 6~10fps(默认) +- **高质量**:最长边 1280px,JPEG 质量 75,目标 5~8fps + +## 支持将离线扫描出来的pdf,通过局域网wifi网络协议发送给pc主机 + +### 已实现的核心功能 + +#### 1. PDF 上传 +- 手机端在导出页可选择"仅传输到电脑" +- 通过 HTTP multipart/form-data 上传到 PC 服务器 `POST /upload/pdf` +- 上传进度与状态实时显示 +- 上传成功后返回 `fileId`,PC 端保存原始 PDF 到 `./uploads/` 目录 + +#### 2. 上传+处理 +- 上传后自动创建处理任务:`POST /tasks/process` +- 支持两种处理类型: + - **OCR PDF** (`processType=ocrpdf`) — 复制原始 PDF 作为"处理结果" + - **Markdown** (`processType=markdown`) — 生成模拟 `.md` 文件 +- 任务状态轮询:queued → processing (10% → 50% → 90%) → completed +- 处理完成后可下载产物 + +#### 3. PC 端管理面板 +- 浏览器访问 `/dashboard` 查看管理界面 +- 统计卡片:已上传文件数、处理任务数、排队中/处理中/已完成 +- 文件列表:显示已上传的 PDF,支持下载原始文件 +- 任务列表:显示所有处理任务,支持下载处理产物 +- 自动刷新(每 2 秒) +- 导航栏:可在图传预览页和管理面板间切换 + +### 所连接的wifi可自定义,可以显示出自己的IP和端口 + +- 设置页可配置 PC 主机地址和端口 +- 支持手动输入 IP 和端口 +- 显示当前手机 IP 地址 +- 通过 `GET /health` 测试连接 +- 局域网发现(mDNS/NSD)的占位代码已准备,待完整实现 + +### MinerU转成markdowm便于数字化存储 ✅ 已实现 + +- `processType=markdown` 处理类型 +- 使用 MinerU `aio_do_parse()` 异步接口,pipeline 后端 +- `HF_HUB_OFFLINE=1` 使用本地缓存模型(绕过 huggingface.co 不可达) +- 输出产物:`.md` + `images/` + `{name}_result.zip`(ZIP 含 .md + images/) +- 手机端可通过任务管理面板查看状态并下载到指定目录 + +### 进行OCRmyPDF 转成双层pdf 📌 下一步 + +- `processType=ocrpdf` 处理类型 +- **当前**:使用 MinerU 生成 layout PDF(画布局框,非真正 OCR) +- **目标**:接入 `ocrmypdf` 库,生成可搜索双层 PDF +- 接口已预留,详见 `requirements/NEXT_STEPS.md` + +## PC 端服务器 + +基于 Python FastAPI,提供以下端点: + +| 端点 | 方法 | 功能 | +|------|------|------| +| `/health` | GET | 健康检查 | +| `/` | GET | 图传预览页面 | +| `/stream` | WS | 接收 JPEG 帧 | +| `/dashboard` | GET | 管理面板页面 | +| `/api/dashboard` | GET | 管理面板 JSON 数据 | +| `/upload/pdf` | POST | 上传 PDF(纯上传,不处理) | +| `/tasks/process` | POST | 创建处理任务 | +| `/tasks/{taskId}` | GET | 查询任务状态 | +| `/tasks/{taskId}/artifacts` | GET | 查询任务产物列表 | +| `/artifacts/{artifactId}/download` | GET | 下载处理产物 | +| `/files/{fileId}/download` | GET | 下载已上传的原始文件 | + +### 手机端任务管理面板 ✅ 已实现 + +- 导出页底部 `TaskPanelSection`:显示所有上传处理任务 +- 任务状态:排队中 / 处理中(进度条) / 已完成 / 失败 +- 2 秒间隔后台轮询,完成后自动停止 +- 已完成任务:选择下载目录(SAF)→ 下载产物 → 打开文件 +- Markdown 任务默认下载 ZIP(.md + images/),OCR PDF 任务下载 PDF + +## 后续待实现 + +- **P0 OCRmyPDF 真实接入**:用 `ocrmypdf` 库替换 MinerU layout PDF,产出可搜索双层 PDF +- **P0 局域网自动发现**:mDNS/NSD 自动发现 PC 服务 +- **处理结果自动下载**:配置开启后自动下载处理结果 +- **图传延迟/帧率实时显示**