From 9ea725fc3660280bc3176c3bf9bbedfce4e08bc5 Mon Sep 17 00:00:00 2001 From: Timo Bryant Date: Thu, 4 Jan 2024 11:12:26 +0100 Subject: [PATCH] ms ocr file processor works --- apps/documentViewer/build.gradle.kts | 15 ++++++++ .../de/itkl/documentViewer/DocumentViewer.kt | 34 +++++++++++++++++++ .../FilesystemProjectManager.kt | 13 ++++++- .../itkl/assetmanager/interfaces/Project.kt | 6 +++- .../core_api/interfaces/ResourceFactory.kt | 4 +++ .../de/itkl/httpClient/clients/MsOcr.kt | 21 ++++++++++-- .../kotlin/de/itkl/textprocessing/Corpus.kt | 15 ++++++-- 7 files changed, 101 insertions(+), 7 deletions(-) diff --git a/apps/documentViewer/build.gradle.kts b/apps/documentViewer/build.gradle.kts index 25e7f04..c98b142 100644 --- a/apps/documentViewer/build.gradle.kts +++ b/apps/documentViewer/build.gradle.kts @@ -7,10 +7,25 @@ repositories { } dependencies { + fun addProjects(vararg names: String) { + names.forEach { + implementation(project(":libraries:$it")) + } + } + + addProjects( + "assetmanager", + "core-api", + "textprocessing", + "httpClient", + "tui", + ) + implementation("org.pushing-pixels:aurora-theming:1.3.0") implementation("org.pushing-pixels:aurora-component:1.3.0") implementation("org.pushing-pixels:aurora-window:1.3.0") implementation(compose.desktop.currentOs) implementation("io.github.panpf.zoomimage:zoomimage-compose:1.0.0-beta11") implementation("io.github.panpf.zoomimage:zoomimage-compose-desktop:1.0.0-beta11") + } \ No newline at end of file diff --git a/apps/documentViewer/src/main/kotlin/de/itkl/documentViewer/DocumentViewer.kt b/apps/documentViewer/src/main/kotlin/de/itkl/documentViewer/DocumentViewer.kt index f8a6c2f..5987092 100644 --- a/apps/documentViewer/src/main/kotlin/de/itkl/documentViewer/DocumentViewer.kt +++ b/apps/documentViewer/src/main/kotlin/de/itkl/documentViewer/DocumentViewer.kt @@ -39,6 +39,14 @@ import com.github.panpf.zoomimage.ZoomImage import com.github.panpf.zoomimage.compose.ZoomState import com.github.panpf.zoomimage.compose.rememberZoomState import com.github.panpf.zoomimage.compose.zoom.* +import de.itkl.assetmanager.assetManagerModule +import de.itkl.core_api.coreApiModule +import de.itkl.httpClient.clients.MsOcr +import de.itkl.httpClient.httpClientModule +import de.itkl.textprocessing.CorpusFactory +import de.itkl.textprocessing.Document +import de.itkl.textprocessing.textProcessingModule +import de.itkl.tui.tuiModule import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.withContext import org.pushingpixels.aurora.theming.auroraBackground @@ -49,12 +57,38 @@ import org.pushingpixels.aurora.window.auroraApplication import java.io.File import java.io.IOException import io.github.oshai.kotlinlogging.KotlinLogging +import kotlinx.coroutines.runBlocking +import org.koin.core.component.KoinComponent +import org.koin.core.component.inject +import org.koin.core.context.startKoin import com.github.panpf.zoomimage.util.Logger as ZoomLogger private val Log = KotlinLogging.logger { } +class DocumentViewer : KoinComponent { + suspend fun loadTestDocument() { + val corpus = CorpusFactory().load("assets/xs-reg") + val document = corpus.document("00001.jpg") + val ocrExtractor: MsOcr by inject() + document.process(ocrExtractor) + } +} + fun main() = auroraApplication { + startKoin { + modules( + coreApiModule, + textProcessingModule, + tuiModule, + assetManagerModule, + httpClientModule) + } + + runBlocking { + DocumentViewer().loadTestDocument() + } + val state = rememberWindowState( placement = WindowPlacement.Floating, position = WindowPosition.Aligned(Alignment.Center), diff --git a/libraries/assetmanager/src/main/kotlin/de/itkl/assetmanager/implementation/FilesystemProjectManager.kt b/libraries/assetmanager/src/main/kotlin/de/itkl/assetmanager/implementation/FilesystemProjectManager.kt index cbb1946..671cbff 100644 --- a/libraries/assetmanager/src/main/kotlin/de/itkl/assetmanager/implementation/FilesystemProjectManager.kt +++ b/libraries/assetmanager/src/main/kotlin/de/itkl/assetmanager/implementation/FilesystemProjectManager.kt @@ -4,6 +4,8 @@ import de.itkl.assetmanager.interfaces.AssetManager import de.itkl.assetmanager.interfaces.Assets import de.itkl.assetmanager.interfaces.Project import de.itkl.assetmanager.interfaces.ProjectManager +import de.itkl.core_api.interfaces.Resource +import de.itkl.core_api.interfaces.ResourceFactory import io.github.oshai.kotlinlogging.KotlinLogging import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.withContext @@ -42,8 +44,17 @@ class FilesystemProject( override val documentNames: List ) : Project, KoinComponent { + private val basePath = Paths.get(name) + private val assetManager: AssetManager by inject() - override suspend fun assetsOf(documentName: String): Assets { + private val resourceFactory: ResourceFactory by inject() + override suspend fun assets(documentName: String): Assets { return assetManager.assets(documentName) } + + override suspend fun resource(name: String): Resource? { + Log.debug { "Project: opening resource of name $name" } + + return resourceFactory.file(basePath.resolve(name)) + } } diff --git a/libraries/assetmanager/src/main/kotlin/de/itkl/assetmanager/interfaces/Project.kt b/libraries/assetmanager/src/main/kotlin/de/itkl/assetmanager/interfaces/Project.kt index cbd0455..20cbf6f 100644 --- a/libraries/assetmanager/src/main/kotlin/de/itkl/assetmanager/interfaces/Project.kt +++ b/libraries/assetmanager/src/main/kotlin/de/itkl/assetmanager/interfaces/Project.kt @@ -1,5 +1,7 @@ package de.itkl.assetmanager.interfaces +import de.itkl.core_api.interfaces.Resource + /** * A set of documents. Each can hold its own assets */ @@ -7,5 +9,7 @@ interface Project { val name: String val displayName: String val documentNames: List - suspend fun assetsOf(documentName: String): Assets + suspend fun assets(documentName: String): Assets + + suspend fun resource(name: String): Resource? } \ No newline at end of file diff --git a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/ResourceFactory.kt b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/ResourceFactory.kt index 2ac8201..7853384 100644 --- a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/ResourceFactory.kt +++ b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/ResourceFactory.kt @@ -6,11 +6,15 @@ import org.koin.core.component.KoinComponent import org.koin.core.component.inject import java.io.File import java.nio.file.Path +import java.nio.file.Paths class ResourceFactory : KoinComponent { private val progressBarFactory by inject() + fun file(path: String): Resource { + return file(Paths.get(path)) + } fun file(path: Path): Resource { return file(path.toFile()) } diff --git a/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcr.kt b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcr.kt index 1f1233d..a27e3d5 100644 --- a/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcr.kt +++ b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcr.kt @@ -1,6 +1,7 @@ package de.itkl.httpClient.clients import de.itkl.core_api.dtos.MsOcrResponse +import de.itkl.core_api.interfaces.FileProcessor import de.itkl.core_api.interfaces.Resource import io.github.oshai.kotlinlogging.KotlinLogging import io.ktor.client.* @@ -8,11 +9,16 @@ import io.ktor.client.call.* import io.ktor.client.request.* import io.ktor.client.statement.* import io.ktor.http.* +import kotlinx.serialization.json.Json import org.koin.core.component.KoinComponent import org.koin.core.component.inject +import java.io.File +import java.nio.file.Path +import kotlin.io.path.nameWithoutExtension +import kotlin.io.path.writeText private val Log = KotlinLogging.logger { } -class MsOcr: KoinComponent { +class MsOcr: KoinComponent, FileProcessor { private val httpClient: HttpClient by inject() suspend fun ocr(resource: Resource): MsOcrResponse { @@ -25,7 +31,18 @@ class MsOcr: KoinComponent { contentType(resource.contentType) setBody(resource.read()) } - println(response.bodyAsText()) return response.body() } + + override fun willProduce(path: Path): Path { + return path.parent.resolve(path.nameWithoutExtension + ".ms-ocr.json") + } + + override suspend fun process(resource: Resource): File { + val result = ocr(resource) + val jsonString = Json.encodeToString(MsOcrResponse.serializer(), result) + val destination = willProduce(resource.path!!) + destination.writeText(jsonString) + return destination.toFile() + } } \ No newline at end of file diff --git a/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/Corpus.kt b/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/Corpus.kt index 62de6cc..834c478 100644 --- a/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/Corpus.kt +++ b/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/Corpus.kt @@ -3,26 +3,35 @@ package de.itkl.textprocessing import de.itkl.assetmanager.interfaces.Project import de.itkl.assetmanager.interfaces.ProjectManager import de.itkl.core_api.interfaces.FileProcessor +import de.itkl.core_api.interfaces.ResourceFactory import de.itkl.core_api.interfaces.data.Processable +import io.github.oshai.kotlinlogging.KotlinLogging import org.koin.core.component.KoinComponent import org.koin.core.component.inject import org.koin.java.KoinJavaComponent.inject +import java.nio.file.Paths +private val Log = KotlinLogging.logger { } class CorpusFactory : KoinComponent { private val projectManager: ProjectManager by inject() suspend fun load(name: String): Corpus { - return Corpus(projectManager.load(name)) + Log.info { "Open corpus at ${Paths.get(name).toAbsolutePath()}" } + return Corpus(projectManager.load(name)).apply { + Log.debug { "Found documents in corpus: ${this.documentNames.joinToString("\n")}" } + } } } -class Corpus(private val project: Project): Processable { +class Corpus(private val project: Project): Processable, KoinComponent { val displayName get() = project.displayName val documentNames get() = project.documentNames + private val resourceFactory: ResourceFactory by inject() + override suspend fun process(fileProcessor: FileProcessor) { TODO("NEXT") } suspend fun document(name: String): Document { - TODO() + return Document(name, listOf(project.resource(name)!!)) } } \ No newline at end of file