ms ocr file processor works

3
Timo Bryant 2024-01-04 11:12:26 +01:00
parent accdfbca67
commit 9ea725fc36
7 changed files with 101 additions and 7 deletions

View File

@ -7,10 +7,25 @@ repositories {
} }
dependencies { dependencies {
fun addProjects(vararg names: String) {
names.forEach {
implementation(project(":libraries:$it"))
}
}
addProjects(
"assetmanager",
"core-api",
"textprocessing",
"httpClient",
"tui",
)
implementation("org.pushing-pixels:aurora-theming:1.3.0") implementation("org.pushing-pixels:aurora-theming:1.3.0")
implementation("org.pushing-pixels:aurora-component:1.3.0") implementation("org.pushing-pixels:aurora-component:1.3.0")
implementation("org.pushing-pixels:aurora-window:1.3.0") implementation("org.pushing-pixels:aurora-window:1.3.0")
implementation(compose.desktop.currentOs) implementation(compose.desktop.currentOs)
implementation("io.github.panpf.zoomimage:zoomimage-compose:1.0.0-beta11") implementation("io.github.panpf.zoomimage:zoomimage-compose:1.0.0-beta11")
implementation("io.github.panpf.zoomimage:zoomimage-compose-desktop:1.0.0-beta11") implementation("io.github.panpf.zoomimage:zoomimage-compose-desktop:1.0.0-beta11")
} }

View File

@ -39,6 +39,14 @@ import com.github.panpf.zoomimage.ZoomImage
import com.github.panpf.zoomimage.compose.ZoomState import com.github.panpf.zoomimage.compose.ZoomState
import com.github.panpf.zoomimage.compose.rememberZoomState import com.github.panpf.zoomimage.compose.rememberZoomState
import com.github.panpf.zoomimage.compose.zoom.* import com.github.panpf.zoomimage.compose.zoom.*
import de.itkl.assetmanager.assetManagerModule
import de.itkl.core_api.coreApiModule
import de.itkl.httpClient.clients.MsOcr
import de.itkl.httpClient.httpClientModule
import de.itkl.textprocessing.CorpusFactory
import de.itkl.textprocessing.Document
import de.itkl.textprocessing.textProcessingModule
import de.itkl.tui.tuiModule
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext import kotlinx.coroutines.withContext
import org.pushingpixels.aurora.theming.auroraBackground import org.pushingpixels.aurora.theming.auroraBackground
@ -49,12 +57,38 @@ import org.pushingpixels.aurora.window.auroraApplication
import java.io.File import java.io.File
import java.io.IOException import java.io.IOException
import io.github.oshai.kotlinlogging.KotlinLogging import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.runBlocking
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import org.koin.core.context.startKoin
import com.github.panpf.zoomimage.util.Logger as ZoomLogger import com.github.panpf.zoomimage.util.Logger as ZoomLogger
private val Log = KotlinLogging.logger { } private val Log = KotlinLogging.logger { }
class DocumentViewer : KoinComponent {
suspend fun loadTestDocument() {
val corpus = CorpusFactory().load("assets/xs-reg")
val document = corpus.document("00001.jpg")
val ocrExtractor: MsOcr by inject()
document.process(ocrExtractor)
}
}
fun main() = auroraApplication { fun main() = auroraApplication {
startKoin {
modules(
coreApiModule,
textProcessingModule,
tuiModule,
assetManagerModule,
httpClientModule)
}
runBlocking {
DocumentViewer().loadTestDocument()
}
val state = rememberWindowState( val state = rememberWindowState(
placement = WindowPlacement.Floating, placement = WindowPlacement.Floating,
position = WindowPosition.Aligned(Alignment.Center), position = WindowPosition.Aligned(Alignment.Center),

View File

@ -4,6 +4,8 @@ import de.itkl.assetmanager.interfaces.AssetManager
import de.itkl.assetmanager.interfaces.Assets import de.itkl.assetmanager.interfaces.Assets
import de.itkl.assetmanager.interfaces.Project import de.itkl.assetmanager.interfaces.Project
import de.itkl.assetmanager.interfaces.ProjectManager import de.itkl.assetmanager.interfaces.ProjectManager
import de.itkl.core_api.interfaces.Resource
import de.itkl.core_api.interfaces.ResourceFactory
import io.github.oshai.kotlinlogging.KotlinLogging import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext import kotlinx.coroutines.withContext
@ -42,8 +44,17 @@ class FilesystemProject(
override val documentNames: List<String> override val documentNames: List<String>
) : Project, KoinComponent { ) : Project, KoinComponent {
private val basePath = Paths.get(name)
private val assetManager: AssetManager by inject() private val assetManager: AssetManager by inject()
override suspend fun assetsOf(documentName: String): Assets { private val resourceFactory: ResourceFactory by inject()
override suspend fun assets(documentName: String): Assets {
return assetManager.assets(documentName) return assetManager.assets(documentName)
} }
override suspend fun resource(name: String): Resource? {
Log.debug { "Project: opening resource of name $name" }
return resourceFactory.file(basePath.resolve(name))
}
} }

View File

@ -1,5 +1,7 @@
package de.itkl.assetmanager.interfaces package de.itkl.assetmanager.interfaces
import de.itkl.core_api.interfaces.Resource
/** /**
* A set of documents. Each can hold its own assets * A set of documents. Each can hold its own assets
*/ */
@ -7,5 +9,7 @@ interface Project {
val name: String val name: String
val displayName: String val displayName: String
val documentNames: List<String> val documentNames: List<String>
suspend fun assetsOf(documentName: String): Assets suspend fun assets(documentName: String): Assets
suspend fun resource(name: String): Resource?
} }

View File

@ -6,11 +6,15 @@ import org.koin.core.component.KoinComponent
import org.koin.core.component.inject import org.koin.core.component.inject
import java.io.File import java.io.File
import java.nio.file.Path import java.nio.file.Path
import java.nio.file.Paths
class ResourceFactory : KoinComponent { class ResourceFactory : KoinComponent {
private val progressBarFactory by inject<ProgressBarFactory>() private val progressBarFactory by inject<ProgressBarFactory>()
fun file(path: String): Resource {
return file(Paths.get(path))
}
fun file(path: Path): Resource { fun file(path: Path): Resource {
return file(path.toFile()) return file(path.toFile())
} }

View File

@ -1,6 +1,7 @@
package de.itkl.httpClient.clients package de.itkl.httpClient.clients
import de.itkl.core_api.dtos.MsOcrResponse import de.itkl.core_api.dtos.MsOcrResponse
import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.Resource import de.itkl.core_api.interfaces.Resource
import io.github.oshai.kotlinlogging.KotlinLogging import io.github.oshai.kotlinlogging.KotlinLogging
import io.ktor.client.* import io.ktor.client.*
@ -8,11 +9,16 @@ import io.ktor.client.call.*
import io.ktor.client.request.* import io.ktor.client.request.*
import io.ktor.client.statement.* import io.ktor.client.statement.*
import io.ktor.http.* import io.ktor.http.*
import kotlinx.serialization.json.Json
import org.koin.core.component.KoinComponent import org.koin.core.component.KoinComponent
import org.koin.core.component.inject import org.koin.core.component.inject
import java.io.File
import java.nio.file.Path
import kotlin.io.path.nameWithoutExtension
import kotlin.io.path.writeText
private val Log = KotlinLogging.logger { } private val Log = KotlinLogging.logger { }
class MsOcr: KoinComponent { class MsOcr: KoinComponent, FileProcessor {
private val httpClient: HttpClient by inject() private val httpClient: HttpClient by inject()
suspend fun ocr(resource: Resource): MsOcrResponse { suspend fun ocr(resource: Resource): MsOcrResponse {
@ -25,7 +31,18 @@ class MsOcr: KoinComponent {
contentType(resource.contentType) contentType(resource.contentType)
setBody(resource.read()) setBody(resource.read())
} }
println(response.bodyAsText())
return response.body() return response.body()
} }
override fun willProduce(path: Path): Path {
return path.parent.resolve(path.nameWithoutExtension + ".ms-ocr.json")
}
override suspend fun process(resource: Resource): File {
val result = ocr(resource)
val jsonString = Json.encodeToString(MsOcrResponse.serializer(), result)
val destination = willProduce(resource.path!!)
destination.writeText(jsonString)
return destination.toFile()
}
} }

View File

@ -3,26 +3,35 @@ package de.itkl.textprocessing
import de.itkl.assetmanager.interfaces.Project import de.itkl.assetmanager.interfaces.Project
import de.itkl.assetmanager.interfaces.ProjectManager import de.itkl.assetmanager.interfaces.ProjectManager
import de.itkl.core_api.interfaces.FileProcessor import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.ResourceFactory
import de.itkl.core_api.interfaces.data.Processable import de.itkl.core_api.interfaces.data.Processable
import io.github.oshai.kotlinlogging.KotlinLogging
import org.koin.core.component.KoinComponent import org.koin.core.component.KoinComponent
import org.koin.core.component.inject import org.koin.core.component.inject
import org.koin.java.KoinJavaComponent.inject import org.koin.java.KoinJavaComponent.inject
import java.nio.file.Paths
private val Log = KotlinLogging.logger { }
class CorpusFactory : KoinComponent { class CorpusFactory : KoinComponent {
private val projectManager: ProjectManager by inject() private val projectManager: ProjectManager by inject()
suspend fun load(name: String): Corpus { suspend fun load(name: String): Corpus {
return Corpus(projectManager.load(name)) Log.info { "Open corpus at ${Paths.get(name).toAbsolutePath()}" }
return Corpus(projectManager.load(name)).apply {
Log.debug { "Found documents in corpus: ${this.documentNames.joinToString("\n")}" }
} }
} }
class Corpus(private val project: Project): Processable { }
class Corpus(private val project: Project): Processable, KoinComponent {
val displayName get() = project.displayName val displayName get() = project.displayName
val documentNames get() = project.documentNames val documentNames get() = project.documentNames
private val resourceFactory: ResourceFactory by inject()
override suspend fun process(fileProcessor: FileProcessor) { override suspend fun process(fileProcessor: FileProcessor) {
TODO("NEXT") TODO("NEXT")
} }
suspend fun document(name: String): Document { suspend fun document(name: String): Document {
TODO() return Document(name, listOf(project.resource(name)!!))
} }
} }