ms ocr file processor works

3
Timo Bryant 2024-01-04 11:12:26 +01:00
parent accdfbca67
commit 9ea725fc36
7 changed files with 101 additions and 7 deletions

View File

@ -7,10 +7,25 @@ repositories {
}
dependencies {
fun addProjects(vararg names: String) {
names.forEach {
implementation(project(":libraries:$it"))
}
}
addProjects(
"assetmanager",
"core-api",
"textprocessing",
"httpClient",
"tui",
)
implementation("org.pushing-pixels:aurora-theming:1.3.0")
implementation("org.pushing-pixels:aurora-component:1.3.0")
implementation("org.pushing-pixels:aurora-window:1.3.0")
implementation(compose.desktop.currentOs)
implementation("io.github.panpf.zoomimage:zoomimage-compose:1.0.0-beta11")
implementation("io.github.panpf.zoomimage:zoomimage-compose-desktop:1.0.0-beta11")
}

View File

@ -39,6 +39,14 @@ import com.github.panpf.zoomimage.ZoomImage
import com.github.panpf.zoomimage.compose.ZoomState
import com.github.panpf.zoomimage.compose.rememberZoomState
import com.github.panpf.zoomimage.compose.zoom.*
import de.itkl.assetmanager.assetManagerModule
import de.itkl.core_api.coreApiModule
import de.itkl.httpClient.clients.MsOcr
import de.itkl.httpClient.httpClientModule
import de.itkl.textprocessing.CorpusFactory
import de.itkl.textprocessing.Document
import de.itkl.textprocessing.textProcessingModule
import de.itkl.tui.tuiModule
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import org.pushingpixels.aurora.theming.auroraBackground
@ -49,12 +57,38 @@ import org.pushingpixels.aurora.window.auroraApplication
import java.io.File
import java.io.IOException
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.runBlocking
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import org.koin.core.context.startKoin
import com.github.panpf.zoomimage.util.Logger as ZoomLogger
private val Log = KotlinLogging.logger { }
class DocumentViewer : KoinComponent {
suspend fun loadTestDocument() {
val corpus = CorpusFactory().load("assets/xs-reg")
val document = corpus.document("00001.jpg")
val ocrExtractor: MsOcr by inject()
document.process(ocrExtractor)
}
}
fun main() = auroraApplication {
startKoin {
modules(
coreApiModule,
textProcessingModule,
tuiModule,
assetManagerModule,
httpClientModule)
}
runBlocking {
DocumentViewer().loadTestDocument()
}
val state = rememberWindowState(
placement = WindowPlacement.Floating,
position = WindowPosition.Aligned(Alignment.Center),

View File

@ -4,6 +4,8 @@ import de.itkl.assetmanager.interfaces.AssetManager
import de.itkl.assetmanager.interfaces.Assets
import de.itkl.assetmanager.interfaces.Project
import de.itkl.assetmanager.interfaces.ProjectManager
import de.itkl.core_api.interfaces.Resource
import de.itkl.core_api.interfaces.ResourceFactory
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
@ -42,8 +44,17 @@ class FilesystemProject(
override val documentNames: List<String>
) : Project, KoinComponent {
private val basePath = Paths.get(name)
private val assetManager: AssetManager by inject()
override suspend fun assetsOf(documentName: String): Assets {
private val resourceFactory: ResourceFactory by inject()
override suspend fun assets(documentName: String): Assets {
return assetManager.assets(documentName)
}
override suspend fun resource(name: String): Resource? {
Log.debug { "Project: opening resource of name $name" }
return resourceFactory.file(basePath.resolve(name))
}
}

View File

@ -1,5 +1,7 @@
package de.itkl.assetmanager.interfaces
import de.itkl.core_api.interfaces.Resource
/**
* A set of documents. Each can hold its own assets
*/
@ -7,5 +9,7 @@ interface Project {
val name: String
val displayName: String
val documentNames: List<String>
suspend fun assetsOf(documentName: String): Assets
suspend fun assets(documentName: String): Assets
suspend fun resource(name: String): Resource?
}

View File

@ -6,11 +6,15 @@ import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import java.io.File
import java.nio.file.Path
import java.nio.file.Paths
class ResourceFactory : KoinComponent {
private val progressBarFactory by inject<ProgressBarFactory>()
fun file(path: String): Resource {
return file(Paths.get(path))
}
fun file(path: Path): Resource {
return file(path.toFile())
}

View File

@ -1,6 +1,7 @@
package de.itkl.httpClient.clients
import de.itkl.core_api.dtos.MsOcrResponse
import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.Resource
import io.github.oshai.kotlinlogging.KotlinLogging
import io.ktor.client.*
@ -8,11 +9,16 @@ import io.ktor.client.call.*
import io.ktor.client.request.*
import io.ktor.client.statement.*
import io.ktor.http.*
import kotlinx.serialization.json.Json
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import java.io.File
import java.nio.file.Path
import kotlin.io.path.nameWithoutExtension
import kotlin.io.path.writeText
private val Log = KotlinLogging.logger { }
class MsOcr: KoinComponent {
class MsOcr: KoinComponent, FileProcessor {
private val httpClient: HttpClient by inject()
suspend fun ocr(resource: Resource): MsOcrResponse {
@ -25,7 +31,18 @@ class MsOcr: KoinComponent {
contentType(resource.contentType)
setBody(resource.read())
}
println(response.bodyAsText())
return response.body()
}
override fun willProduce(path: Path): Path {
return path.parent.resolve(path.nameWithoutExtension + ".ms-ocr.json")
}
override suspend fun process(resource: Resource): File {
val result = ocr(resource)
val jsonString = Json.encodeToString(MsOcrResponse.serializer(), result)
val destination = willProduce(resource.path!!)
destination.writeText(jsonString)
return destination.toFile()
}
}

View File

@ -3,26 +3,35 @@ package de.itkl.textprocessing
import de.itkl.assetmanager.interfaces.Project
import de.itkl.assetmanager.interfaces.ProjectManager
import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.ResourceFactory
import de.itkl.core_api.interfaces.data.Processable
import io.github.oshai.kotlinlogging.KotlinLogging
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import org.koin.java.KoinJavaComponent.inject
import java.nio.file.Paths
private val Log = KotlinLogging.logger { }
class CorpusFactory : KoinComponent {
private val projectManager: ProjectManager by inject()
suspend fun load(name: String): Corpus {
return Corpus(projectManager.load(name))
Log.info { "Open corpus at ${Paths.get(name).toAbsolutePath()}" }
return Corpus(projectManager.load(name)).apply {
Log.debug { "Found documents in corpus: ${this.documentNames.joinToString("\n")}" }
}
}
class Corpus(private val project: Project): Processable {
}
class Corpus(private val project: Project): Processable, KoinComponent {
val displayName get() = project.displayName
val documentNames get() = project.documentNames
private val resourceFactory: ResourceFactory by inject()
override suspend fun process(fileProcessor: FileProcessor) {
TODO("NEXT")
}
suspend fun document(name: String): Document {
TODO()
return Document(name, listOf(project.resource(name)!!))
}
}