Compare commits
No commits in common. "5a3f4031d2fdcf806fb0ccb43ddf1ec858e1017b" and "9f3813a83abe26d9a0da9f3402ae0a233b100597" have entirely different histories.
5a3f4031d2
...
9f3813a83a
|
|
@ -1,4 +1,4 @@
|
||||||
.gradle
|
.gradle
|
||||||
build
|
build
|
||||||
.idea
|
.idea
|
||||||
/assets
|
assets
|
||||||
|
|
@ -7,5 +7,4 @@
|
||||||
start-page="docthor.md">
|
start-page="docthor.md">
|
||||||
|
|
||||||
<toc-element topic="docthor.md"/>
|
<toc-element topic="docthor.md"/>
|
||||||
<toc-element topic="Snippets.md"/>
|
|
||||||
</instance-profile>
|
</instance-profile>
|
||||||
|
|
@ -1,36 +0,0 @@
|
||||||
# Snippets
|
|
||||||
|
|
||||||
## Scale a Shape alongside ZoomImage
|
|
||||||
|
|
||||||
|
|
||||||
```kotlin
|
|
||||||
|
|
||||||
@Composable
|
|
||||||
fun shapes(zoomableState: ZoomableState) {
|
|
||||||
Box(modifier = Modifier.fillMaxSize()) {
|
|
||||||
val scaleX = zoomableState.transform.scaleX
|
|
||||||
val scaleY = zoomableState.transform.scaleY
|
|
||||||
Box(
|
|
||||||
modifier = Modifier
|
|
||||||
.offset { IntOffset(
|
|
||||||
((zoomableState.transform.offset.x + (288 * scaleX)) ).toInt(),
|
|
||||||
((zoomableState.transform.offset.y + (697 * scaleY)) ).toInt()
|
|
||||||
) }
|
|
||||||
.clip(RectangleShape)
|
|
||||||
.size(100.dp * scaleX)
|
|
||||||
.background(Color.Red)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Scale a Canvas alongside Zoomimage
|
|
||||||
|
|
||||||
```kotlin
|
|
||||||
drawRect(
|
|
||||||
Color.Blue,
|
|
||||||
topLeft = zoomableState.transform.offset + (Offset(288 * zoomableState.transform.scaleX,697 * zoomableState.transform.scaleY)),
|
|
||||||
size = Size( (793 - 288)* zoomableState.transform.scaleX, (741 - 697) * zoomableState.transform.scaleY),
|
|
||||||
style = Stroke(width = 5f)
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
@ -11,15 +11,6 @@ Asset can be found under <path>memento:/mnt/wd/export/data</path>
|
||||||
<def title="PDF Renderer for Compose">
|
<def title="PDF Renderer for Compose">
|
||||||
<a href="https://github.com/GRizzi91/bouquet">bouquet</a>
|
<a href="https://github.com/GRizzi91/bouquet">bouquet</a>
|
||||||
</def>
|
</def>
|
||||||
<def title="Moko Resource">
|
|
||||||
<a href="https://github.com/icerockdev/moko-resources">Resource Management für Compose</a>
|
|
||||||
</def>
|
|
||||||
<def title="Aurora">
|
|
||||||
<a href="https://github.com/kirill-grouchnikov/aurora">Building modern, elegant and fast desktop Compose applications</a>
|
|
||||||
</def>
|
|
||||||
<def title="Zoomimage">
|
|
||||||
<a href="https://github.com/panpf/zoomimage">Zooming an Image</a>
|
|
||||||
</def>
|
|
||||||
</deflist>
|
</deflist>
|
||||||
|
|
||||||
## Modules - Libraries
|
## Modules - Libraries
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
plugins {
|
plugins {
|
||||||
id("docthor.kotlin-application-conventions")
|
id("docthor.kotlin-application-conventions")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ class ComputeIdf : CliktCommand() {
|
||||||
.required()
|
.required()
|
||||||
|
|
||||||
override fun run() = runBlocking {
|
override fun run() = runBlocking {
|
||||||
TfIdfPipeline(force = false)
|
TfIdfPipeline(force = true)
|
||||||
.input(corpus)
|
.input(corpus)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,31 +0,0 @@
|
||||||
plugins {
|
|
||||||
id("org.jetbrains.compose") version "1.5.11"
|
|
||||||
}
|
|
||||||
|
|
||||||
repositories {
|
|
||||||
google()
|
|
||||||
}
|
|
||||||
|
|
||||||
dependencies {
|
|
||||||
fun addProjects(vararg names: String) {
|
|
||||||
names.forEach {
|
|
||||||
implementation(project(":libraries:$it"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
addProjects(
|
|
||||||
"assetmanager",
|
|
||||||
"core-api",
|
|
||||||
"textprocessing",
|
|
||||||
"httpClient",
|
|
||||||
"tui",
|
|
||||||
)
|
|
||||||
|
|
||||||
implementation("org.pushing-pixels:aurora-theming:1.3.0")
|
|
||||||
implementation("org.pushing-pixels:aurora-component:1.3.0")
|
|
||||||
implementation("org.pushing-pixels:aurora-window:1.3.0")
|
|
||||||
implementation(compose.desktop.currentOs)
|
|
||||||
implementation("io.github.panpf.zoomimage:zoomimage-compose:1.0.0-beta11")
|
|
||||||
implementation("io.github.panpf.zoomimage:zoomimage-compose-desktop:1.0.0-beta11")
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,235 +0,0 @@
|
||||||
package de.itkl.documentViewer
|
|
||||||
|
|
||||||
import androidx.compose.foundation.*
|
|
||||||
import androidx.compose.foundation.layout.*
|
|
||||||
import androidx.compose.material.Text
|
|
||||||
import androidx.compose.runtime.*
|
|
||||||
import androidx.compose.ui.Alignment
|
|
||||||
import androidx.compose.ui.ExperimentalComposeUiApi
|
|
||||||
import androidx.compose.ui.Modifier
|
|
||||||
import androidx.compose.ui.draw.clip
|
|
||||||
import androidx.compose.ui.geometry.Offset
|
|
||||||
import androidx.compose.ui.geometry.Size
|
|
||||||
import androidx.compose.ui.graphics.Color
|
|
||||||
import androidx.compose.ui.graphics.ImageBitmap
|
|
||||||
import androidx.compose.ui.graphics.RectangleShape
|
|
||||||
import androidx.compose.ui.graphics.drawscope.Stroke
|
|
||||||
import androidx.compose.ui.graphics.painter.BitmapPainter
|
|
||||||
import androidx.compose.ui.graphics.painter.Painter
|
|
||||||
import androidx.compose.ui.layout.ContentScale
|
|
||||||
import androidx.compose.ui.res.loadImageBitmap
|
|
||||||
import androidx.compose.ui.unit.DpSize
|
|
||||||
import androidx.compose.ui.unit.IntOffset
|
|
||||||
import androidx.compose.ui.unit.dp
|
|
||||||
import androidx.compose.ui.window.WindowPlacement
|
|
||||||
import androidx.compose.ui.window.WindowPosition
|
|
||||||
import androidx.compose.ui.window.rememberWindowState
|
|
||||||
import com.github.panpf.zoomimage.ZoomImage
|
|
||||||
import com.github.panpf.zoomimage.compose.ZoomState
|
|
||||||
import com.github.panpf.zoomimage.compose.rememberZoomState
|
|
||||||
import com.github.panpf.zoomimage.compose.zoom.*
|
|
||||||
import de.itkl.assetmanager.assetManagerModule
|
|
||||||
import de.itkl.core_api.coreApiModule
|
|
||||||
import de.itkl.httpClient.clients.MsOcr
|
|
||||||
import de.itkl.httpClient.httpClientModule
|
|
||||||
import de.itkl.textprocessing.CorpusFactory
|
|
||||||
import de.itkl.textprocessing.Document
|
|
||||||
import de.itkl.textprocessing.OcrPage
|
|
||||||
import de.itkl.textprocessing.textProcessingModule
|
|
||||||
import de.itkl.tui.tuiModule
|
|
||||||
import kotlinx.coroutines.Dispatchers
|
|
||||||
import kotlinx.coroutines.withContext
|
|
||||||
import org.pushingpixels.aurora.theming.auroraBackground
|
|
||||||
import org.pushingpixels.aurora.theming.marinerSkin
|
|
||||||
import org.pushingpixels.aurora.window.AuroraWindow
|
|
||||||
import org.pushingpixels.aurora.window.AuroraWindowTitlePaneConfigurations
|
|
||||||
import org.pushingpixels.aurora.window.auroraApplication
|
|
||||||
import java.io.File
|
|
||||||
import java.io.IOException
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
|
||||||
import kotlinx.coroutines.runBlocking
|
|
||||||
import org.koin.core.component.KoinComponent
|
|
||||||
import org.koin.core.component.inject
|
|
||||||
import org.koin.core.context.startKoin
|
|
||||||
import com.github.panpf.zoomimage.util.Logger as ZoomLogger
|
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentViewer : KoinComponent {
|
|
||||||
suspend fun loadTestDocument(): Document {
|
|
||||||
val corpus = CorpusFactory().load("assets/xs-reg")
|
|
||||||
val document = corpus.document("00001.jpg")
|
|
||||||
val ocrExtractor: MsOcr by inject()
|
|
||||||
document.process(ocrExtractor)
|
|
||||||
return document
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun main() = auroraApplication {
|
|
||||||
startKoin {
|
|
||||||
modules(
|
|
||||||
coreApiModule,
|
|
||||||
textProcessingModule,
|
|
||||||
tuiModule,
|
|
||||||
assetManagerModule,
|
|
||||||
httpClientModule)
|
|
||||||
}
|
|
||||||
|
|
||||||
val document = runBlocking {
|
|
||||||
DocumentViewer().loadTestDocument()
|
|
||||||
}
|
|
||||||
|
|
||||||
val state = rememberWindowState(
|
|
||||||
placement = WindowPlacement.Floating,
|
|
||||||
position = WindowPosition.Aligned(Alignment.Center),
|
|
||||||
size = DpSize(1000. dp, 800.dp)
|
|
||||||
)
|
|
||||||
AuroraWindow(
|
|
||||||
skin = marinerSkin(),
|
|
||||||
title = "Document Viewer",
|
|
||||||
state = state,
|
|
||||||
windowTitlePaneConfiguration = AuroraWindowTitlePaneConfigurations.AuroraPlain(),
|
|
||||||
onCloseRequest = ::exitApplication
|
|
||||||
) {
|
|
||||||
viewImage(document)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Composable
|
|
||||||
fun viewImage(document: Document) {
|
|
||||||
val ocr = remember { runBlocking { document.retrieveOcrPages().first() } }
|
|
||||||
Column (
|
|
||||||
modifier = Modifier.fillMaxSize().auroraBackground()
|
|
||||||
) {
|
|
||||||
val state = rememberZoomState(logger = ZoomLogger("zoom", level = ZoomLogger.INFO))
|
|
||||||
Text("${state.zoomable.transform.scale} ${state.zoomable.transform.offset}")
|
|
||||||
Box(
|
|
||||||
modifier = Modifier.fillMaxSize()
|
|
||||||
) {
|
|
||||||
ZoomedImage(
|
|
||||||
state = state,
|
|
||||||
load = { loadImageBitmap(File("assets/xs-reg/00001.jpg")) },
|
|
||||||
painterFor = { remember { BitmapPainter(it) } },
|
|
||||||
contentDescription = "Sample",
|
|
||||||
modifier = Modifier.fillMaxSize()
|
|
||||||
)
|
|
||||||
canvas(state.zoomable, ocr)
|
|
||||||
// shapes(state.zoomable)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Composable
|
|
||||||
fun <T> ZoomedImage(
|
|
||||||
state: ZoomState,
|
|
||||||
load: suspend () -> T,
|
|
||||||
painterFor: @Composable (T) -> Painter,
|
|
||||||
contentDescription: String,
|
|
||||||
modifier: Modifier = Modifier,
|
|
||||||
contentScale: ContentScale = ContentScale.Fit,
|
|
||||||
) {
|
|
||||||
val image: T? by produceState<T?>(null) {
|
|
||||||
value = withContext(Dispatchers.IO) {
|
|
||||||
try {
|
|
||||||
load()
|
|
||||||
} catch (e: IOException) {
|
|
||||||
// instead of printing to console, you can also write this to log,
|
|
||||||
// or show some error placeholder
|
|
||||||
e.printStackTrace()
|
|
||||||
null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (image != null) {
|
|
||||||
val scrollBar = remember {
|
|
||||||
ScrollBarSpec(
|
|
||||||
color = Color.Red,
|
|
||||||
size = 6.dp,
|
|
||||||
margin = 12.dp,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
ZoomImage(
|
|
||||||
painter = painterFor(image!!),
|
|
||||||
contentDescription = contentDescription,
|
|
||||||
contentScale = contentScale,
|
|
||||||
modifier = modifier,
|
|
||||||
scrollBar = scrollBar,
|
|
||||||
state = state
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fun loadImageBitmap(file: File): ImageBitmap =
|
|
||||||
file.inputStream().buffered().use(::loadImageBitmap)
|
|
||||||
|
|
||||||
data class PointConverter(
|
|
||||||
val docWidth: Int,
|
|
||||||
val docHeight: Int,
|
|
||||||
val canvasWidth: Float,
|
|
||||||
val canvasHeight: Float
|
|
||||||
) {
|
|
||||||
fun convertX(x: Int): Float {
|
|
||||||
val xf = x.toFloat()
|
|
||||||
val relXf = docWidth / xf
|
|
||||||
val scaledXf = canvasWidth * relXf
|
|
||||||
// println("X: $scaledXf")
|
|
||||||
return scaledXf
|
|
||||||
}
|
|
||||||
fun convertY(y: Int): Float {
|
|
||||||
val yf = y.toFloat()
|
|
||||||
val relYf = docHeight / yf
|
|
||||||
val scaledYf = canvasHeight * relYf
|
|
||||||
// println("Y: $scaledYf")
|
|
||||||
return scaledYf
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@OptIn(ExperimentalFoundationApi::class)
|
|
||||||
@Composable
|
|
||||||
fun shapes(zoomableState: ZoomableState) {
|
|
||||||
Box(modifier = Modifier.fillMaxSize()) {
|
|
||||||
val scaleX = zoomableState.transform.scaleX
|
|
||||||
val scaleY = zoomableState.transform.scaleY
|
|
||||||
Box(
|
|
||||||
modifier = Modifier
|
|
||||||
.offset { IntOffset(
|
|
||||||
((zoomableState.transform.offset.x + (288 * scaleX)) ).toInt(),
|
|
||||||
((zoomableState.transform.offset.y + (697 * scaleY)) ).toInt()
|
|
||||||
) }
|
|
||||||
.clip(RectangleShape)
|
|
||||||
.size(100.dp * scaleX)
|
|
||||||
.background(Color.Red)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@OptIn(ExperimentalComposeUiApi::class)
|
|
||||||
@Composable
|
|
||||||
fun canvas(zoomableState: ZoomableState, first: OcrPage) {
|
|
||||||
Canvas(modifier = Modifier
|
|
||||||
.fillMaxSize()
|
|
||||||
// .onPointerEvent(PointerEventType.Move) {
|
|
||||||
// val position = it.changes.first().position
|
|
||||||
// println(position)
|
|
||||||
// }
|
|
||||||
)
|
|
||||||
{
|
|
||||||
val converter = PointConverter(
|
|
||||||
docWidth = 2481,
|
|
||||||
docHeight = 3507,
|
|
||||||
canvasWidth = this.size.width,
|
|
||||||
canvasHeight = this.size.height
|
|
||||||
)
|
|
||||||
|
|
||||||
first.words.forEach { word ->
|
|
||||||
val rect = word.rectangle
|
|
||||||
drawRect(
|
|
||||||
Color.Blue,
|
|
||||||
topLeft = zoomableState.transform.offset + (Offset(rect.x.toFloat() * zoomableState.transform.scaleX,rect.y.toFloat() * zoomableState.transform.scaleY)),
|
|
||||||
size = Size(rect.width.toFloat() * zoomableState.transform.scaleX, rect.height.toFloat() * zoomableState.transform.scaleY),
|
|
||||||
style = Stroke(width = 5f)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,7 +1,3 @@
|
||||||
project(":libraries").subprojects {
|
project(":libraries").subprojects {
|
||||||
apply(plugin = "docthor.kotlin-library-conventions")
|
apply(plugin = "docthor.kotlin-library-conventions")
|
||||||
}
|
|
||||||
|
|
||||||
project(":apps").subprojects {
|
|
||||||
apply(plugin = "docthor.kotlin-application-conventions")
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import gradle.kotlin.dsl.accessors._d9dcfd1a467b0b6fe90c5571a57aa558.api
|
|
||||||
import org.gradle.api.plugins.jvm.JvmTestSuite
|
import org.gradle.api.plugins.jvm.JvmTestSuite
|
||||||
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
|
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
|
||||||
|
|
||||||
|
|
@ -18,7 +17,6 @@ dependencies {
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
|
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
|
||||||
|
|
||||||
|
|
||||||
api("io.github.oshai:kotlin-logging-jvm:5.1.0")
|
|
||||||
testImplementation("io.insert-koin:koin-test:$koin_version")
|
testImplementation("io.insert-koin:koin-test:$koin_version")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
[versions]
|
|
||||||
kotlin = "1.9.21"
|
|
||||||
coroutines = "1.7.3"
|
|
||||||
compose = "1.5.11"
|
|
||||||
dokka = "1.9.10"
|
|
||||||
batik = "1.17"
|
|
||||||
|
|
||||||
versionchecker = "0.50.0"
|
|
||||||
mavenpublish = "0.25.3"
|
|
||||||
|
|
||||||
[libraries]
|
|
||||||
compose-desktop = { module = "org.jetbrains.compose:compose-gradle-plugin", version.ref = "compose" }
|
|
||||||
kotlin-gradlePlugin = { module = "org.jetbrains.kotlin:kotlin-gradle-plugin", version.ref = "kotlin" }
|
|
||||||
kotlin-coroutines = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version.ref = "coroutines" }
|
|
||||||
dokka-gradlePlugin = { module = "org.jetbrains.dokka:dokka-gradle-plugin", version.ref = "dokka"}
|
|
||||||
batik = { module = "org.apache.xmlgraphics:batik-all", version.ref = "batik" }
|
|
||||||
|
|
||||||
versionchecker-gradlePlugin = { module = "com.github.ben-manes:gradle-versions-plugin", version.ref = "versionchecker" }
|
|
||||||
mavenpublish-gradlePlugin = { module = "com.vanniktech:gradle-maven-publish-plugin", version.ref = "mavenpublish" }
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
dependencies {
|
|
||||||
api(project(":libraries:core-api"))
|
|
||||||
// used for contentType
|
|
||||||
api("io.ktor:ktor-http-jvm:2.3.7")
|
|
||||||
}
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
package de.itkl.assetmanager
|
|
||||||
|
|
||||||
import de.itkl.assetmanager.implementation.AssetsFileProcessorBackend
|
|
||||||
import de.itkl.assetmanager.implementation.FilesystemAssetManager
|
|
||||||
import de.itkl.assetmanager.implementation.FilesystemProjectManager
|
|
||||||
import de.itkl.assetmanager.interfaces.AssetManager
|
|
||||||
import de.itkl.assetmanager.interfaces.ProjectManager
|
|
||||||
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
|
|
||||||
import org.koin.dsl.module
|
|
||||||
|
|
||||||
val assetManagerModule = module {
|
|
||||||
single<ProjectManager> { FilesystemProjectManager() }
|
|
||||||
single<AssetManager> { FilesystemAssetManager() }
|
|
||||||
single<FileProcessorBackend> { AssetsFileProcessorBackend() }
|
|
||||||
}
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
package de.itkl.assetmanager.implementation
|
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor2
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
|
||||||
import de.itkl.core_api.interfaces.assets.Assets
|
|
||||||
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
|
||||||
import org.koin.core.component.KoinComponent
|
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
|
||||||
class AssetsFileProcessorBackend : FileProcessorBackend, KoinComponent {
|
|
||||||
override suspend fun process(resource: Resource, assets: Assets, fileProcessor: FileProcessor2) {
|
|
||||||
Log.debug { "Call processor '${fileProcessor.filename}' on $resource" }
|
|
||||||
if (assets.exists(fileProcessor.filename)) {
|
|
||||||
Log.info { "${fileProcessor.filename} already exists on ${resource}. Skipping" }
|
|
||||||
} else {
|
|
||||||
Log.info { "${fileProcessor.filename} does not yet exists for $resource" }
|
|
||||||
val newResource = fileProcessor.process(resource)
|
|
||||||
assets.store(newResource)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,84 +0,0 @@
|
||||||
package de.itkl.assetmanager.implementation
|
|
||||||
|
|
||||||
import de.itkl.assetmanager.interfaces.AssetManager
|
|
||||||
import de.itkl.core_api.interfaces.assets.Assets
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
|
||||||
import de.itkl.core_api.interfaces.ResourceFactory
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
|
||||||
import kotlinx.coroutines.Dispatchers
|
|
||||||
import kotlinx.coroutines.flow.FlowCollector
|
|
||||||
import kotlinx.coroutines.flow.emitAll
|
|
||||||
import kotlinx.coroutines.flow.map
|
|
||||||
import kotlinx.coroutines.stream.consumeAsFlow
|
|
||||||
import kotlinx.coroutines.withContext
|
|
||||||
import org.koin.core.component.KoinComponent
|
|
||||||
import org.koin.core.component.inject
|
|
||||||
import java.nio.file.Files
|
|
||||||
import java.nio.file.Path
|
|
||||||
import java.nio.file.Paths
|
|
||||||
import kotlin.io.path.deleteExisting
|
|
||||||
import kotlin.io.path.exists
|
|
||||||
import kotlin.io.path.outputStream
|
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
|
||||||
class FilesystemAssetManager: AssetManager {
|
|
||||||
override suspend fun assets(name: String): Assets {
|
|
||||||
val path = createAssetsPath(name)
|
|
||||||
withContext(Dispatchers.IO) {
|
|
||||||
Files.createDirectories(path)
|
|
||||||
}
|
|
||||||
return FilesystemAssets(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
override suspend fun delete(name: String) {
|
|
||||||
val path = createAssetsPath(name)
|
|
||||||
withContext(Dispatchers.IO) {
|
|
||||||
Files.delete(path)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun createAssetsPath(name: String): Path {
|
|
||||||
return Paths.get(name).parent.resolve("$name.assets.d").toAbsolutePath()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class FilesystemAssets(private val baseDir: Path) : Assets, KoinComponent {
|
|
||||||
|
|
||||||
private val resourceFactory by inject<ResourceFactory>()
|
|
||||||
override suspend fun store(resource: Resource) {
|
|
||||||
val destination = baseDir.resolve(resource.filename)
|
|
||||||
resource.read().use { source ->
|
|
||||||
destination.outputStream().use {output ->
|
|
||||||
withContext(Dispatchers.IO) {
|
|
||||||
source.copyTo(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override suspend fun retrieve(name: String): Resource? {
|
|
||||||
val destination = baseDir.resolve(name)
|
|
||||||
if (!destination.exists()) {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
Log.debug { "Loading file at $destination" }
|
|
||||||
val resource = resourceFactory.file(destination)
|
|
||||||
return resource
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
override suspend fun delete(name: String) {
|
|
||||||
val destination = baseDir.resolve(name)
|
|
||||||
withContext(Dispatchers.IO) {
|
|
||||||
destination.deleteExisting()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override suspend fun collect(collector: FlowCollector<Resource>) {
|
|
||||||
val flow = withContext(Dispatchers.IO) {
|
|
||||||
Files.list(baseDir).consumeAsFlow()
|
|
||||||
}
|
|
||||||
.map { path -> resourceFactory.file(path) }
|
|
||||||
collector.emitAll(flow)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,64 +0,0 @@
|
||||||
package de.itkl.assetmanager.implementation
|
|
||||||
|
|
||||||
import de.itkl.assetmanager.interfaces.AssetManager
|
|
||||||
import de.itkl.core_api.interfaces.assets.Assets
|
|
||||||
import de.itkl.assetmanager.interfaces.Project
|
|
||||||
import de.itkl.assetmanager.interfaces.ProjectManager
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
|
||||||
import de.itkl.core_api.interfaces.ResourceFactory
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
|
||||||
import kotlinx.coroutines.Dispatchers
|
|
||||||
import kotlinx.coroutines.withContext
|
|
||||||
import org.koin.core.component.KoinComponent
|
|
||||||
import org.koin.core.component.inject
|
|
||||||
import java.nio.file.Paths
|
|
||||||
import kotlin.io.path.isDirectory
|
|
||||||
import kotlin.io.path.isRegularFile
|
|
||||||
import kotlin.io.path.listDirectoryEntries
|
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
|
||||||
|
|
||||||
class FilesystemProjectManager : ProjectManager {
|
|
||||||
override suspend fun load(name: String): Project {
|
|
||||||
val path = Paths.get(name)
|
|
||||||
check(path.isDirectory()) {
|
|
||||||
"Currently only directories as corpora are supported"
|
|
||||||
}
|
|
||||||
val documents =
|
|
||||||
withContext(Dispatchers.IO) {
|
|
||||||
path.listDirectoryEntries()
|
|
||||||
.filter { it.isRegularFile() }
|
|
||||||
.map { it.toAbsolutePath() }
|
|
||||||
.map { it.toString() }
|
|
||||||
}
|
|
||||||
return FilesystemProject(
|
|
||||||
name = name,
|
|
||||||
displayName = path.fileName.toString(),
|
|
||||||
documentNames = documents)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class FilesystemProject(
|
|
||||||
override val name: String,
|
|
||||||
override val displayName: String,
|
|
||||||
override val documentNames: List<String>
|
|
||||||
) : Project, KoinComponent {
|
|
||||||
|
|
||||||
private val basePath = Paths.get(name)
|
|
||||||
|
|
||||||
private val assetManager: AssetManager by inject()
|
|
||||||
private val resourceFactory: ResourceFactory by inject()
|
|
||||||
override fun resolveName(name: String): String {
|
|
||||||
return basePath.resolve(name).toAbsolutePath().toString()
|
|
||||||
}
|
|
||||||
|
|
||||||
override suspend fun assets(documentName: String): Assets {
|
|
||||||
return assetManager.assets(documentName)
|
|
||||||
}
|
|
||||||
|
|
||||||
override suspend fun resource(name: String): Resource? {
|
|
||||||
Log.debug { "Project: opening resource of name $name" }
|
|
||||||
|
|
||||||
return resourceFactory.file(basePath.resolve(name))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
package de.itkl.assetmanager.interfaces
|
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.assets.Assets
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Manage the assets for one document
|
|
||||||
*/
|
|
||||||
interface AssetManager {
|
|
||||||
suspend fun assets(name: String): Assets
|
|
||||||
suspend fun delete(name: String)
|
|
||||||
}
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
package de.itkl.assetmanager.interfaces
|
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
|
||||||
import de.itkl.core_api.interfaces.assets.Assets
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A set of documents. Each can hold its own assets
|
|
||||||
*/
|
|
||||||
interface Project {
|
|
||||||
val name: String
|
|
||||||
val displayName: String
|
|
||||||
val documentNames: List<String>
|
|
||||||
|
|
||||||
fun resolveName(name: String): String
|
|
||||||
suspend fun assets(documentName: String): Assets
|
|
||||||
suspend fun resource(name: String): Resource?
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
package de.itkl.assetmanager.interfaces
|
|
||||||
|
|
||||||
interface ProjectManager {
|
|
||||||
suspend fun load(name: String): Project
|
|
||||||
}
|
|
||||||
|
|
@ -1,7 +1,3 @@
|
||||||
plugins {
|
|
||||||
kotlin("plugin.serialization") version embeddedKotlinVersion
|
|
||||||
}
|
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
// used for contentType
|
// used for contentType
|
||||||
api("io.ktor:ktor-http-jvm:2.3.7")
|
api("io.ktor:ktor-http-jvm:2.3.7")
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
package de.itkl.core_api
|
package de.itkl.core_api
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.NoopResourceReadDecorator
|
||||||
import de.itkl.core_api.interfaces.ResourceFactory
|
import de.itkl.core_api.interfaces.ResourceFactory
|
||||||
|
import de.itkl.core_api.interfaces.ResourceReadDecorator
|
||||||
import org.koin.dsl.module
|
import org.koin.dsl.module
|
||||||
|
|
||||||
val coreApiModule = module {
|
val coreApiModule = module {
|
||||||
single<ResourceFactory> { ResourceFactory()}
|
single<ResourceFactory> { ResourceFactory()}
|
||||||
|
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
|
||||||
}
|
}
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
package de.itkl.core_api.implementation
|
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
|
||||||
import io.ktor.http.*
|
|
||||||
import kotlinx.serialization.*
|
|
||||||
import kotlinx.serialization.json.Json
|
|
||||||
import kotlinx.serialization.json.encodeToStream
|
|
||||||
import java.io.File
|
|
||||||
import java.io.InputStream
|
|
||||||
import java.io.UnsupportedEncodingException
|
|
||||||
import java.nio.file.Path
|
|
||||||
|
|
||||||
class SerializableResource<T : Any> @OptIn(ExperimentalSerializationApi::class) constructor(
|
|
||||||
override val filename: String,
|
|
||||||
override val contentType: ContentType,
|
|
||||||
private val obj: T,
|
|
||||||
private val serializer: SerializationStrategy<T>
|
|
||||||
) : Resource {
|
|
||||||
|
|
||||||
override val length: Long? = null
|
|
||||||
override val file: File? = null
|
|
||||||
override val path: Path? = null
|
|
||||||
|
|
||||||
override fun read(): InputStream {
|
|
||||||
return serialize().byteInputStream()
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun serialize(): String {
|
|
||||||
return when(contentType) {
|
|
||||||
ContentType.Application.Json -> Json.encodeToString(serializer, obj)
|
|
||||||
else -> throw UnsupportedEncodingException("Sorry but $contentType is not supported for Resources")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -2,14 +2,8 @@ package de.itkl.core_api.interfaces
|
||||||
|
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
import java.util.function.Consumer
|
|
||||||
|
|
||||||
interface FileProcessor {
|
interface FileProcessor {
|
||||||
fun willProduce(path: Path): Path
|
fun willProduce(path: Path): Path
|
||||||
suspend fun process(resource: Resource): File
|
suspend fun process(resource: Resource): File
|
||||||
}
|
|
||||||
|
|
||||||
interface FileProcessor2 {
|
|
||||||
val filename: String
|
|
||||||
suspend fun process(resource: Resource): Resource
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,15 +1,11 @@
|
||||||
package de.itkl.core_api.interfaces
|
package de.itkl.core_api.interfaces
|
||||||
|
|
||||||
import io.ktor.http.*
|
import io.ktor.http.*
|
||||||
import kotlinx.serialization.DeserializationStrategy
|
|
||||||
import kotlinx.serialization.KSerializer
|
|
||||||
import kotlinx.serialization.json.Json
|
|
||||||
import org.koin.core.component.KoinComponent
|
import org.koin.core.component.KoinComponent
|
||||||
import org.koin.core.component.get
|
import org.koin.core.component.get
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
import kotlin.reflect.KClass
|
|
||||||
|
|
||||||
interface Resource {
|
interface Resource {
|
||||||
val filename: String
|
val filename: String
|
||||||
|
|
@ -19,16 +15,8 @@ interface Resource {
|
||||||
val file: File?
|
val file: File?
|
||||||
val path: Path?
|
val path: Path?
|
||||||
fun read(): InputStream
|
fun read(): InputStream
|
||||||
|
|
||||||
fun <T: Any> json(deserializer: DeserializationStrategy<T>): T {
|
|
||||||
val string = String(read().readAllBytes())
|
|
||||||
return Json.decodeFromString(deserializer, string)
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Automatically adds koin injectable decorators to reading/writing
|
* Automatically adds koin injectable decorators to reading/writing
|
||||||
* operations
|
* operations
|
||||||
|
|
@ -36,10 +24,11 @@ interface Resource {
|
||||||
abstract class AbstractResource : Resource, KoinComponent {
|
abstract class AbstractResource : Resource, KoinComponent {
|
||||||
abstract fun doRead(): InputStream
|
abstract fun doRead(): InputStream
|
||||||
final override fun read(): InputStream {
|
final override fun read(): InputStream {
|
||||||
return doRead()
|
return length?.let { length ->
|
||||||
}
|
get<ResourceReadDecorator>().decorate(
|
||||||
|
length = length,
|
||||||
override fun toString(): String {
|
doRead()
|
||||||
return filename
|
)
|
||||||
|
} ?: doRead()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2,31 +2,13 @@ package de.itkl.core_api.interfaces
|
||||||
|
|
||||||
import de.itkl.core_api.implementation.FileResource
|
import de.itkl.core_api.implementation.FileResource
|
||||||
import de.itkl.core_api.implementation.ProgressResource
|
import de.itkl.core_api.implementation.ProgressResource
|
||||||
import de.itkl.core_api.implementation.SerializableResource
|
|
||||||
import io.ktor.http.*
|
|
||||||
import kotlinx.serialization.SerializationStrategy
|
|
||||||
import org.koin.core.component.KoinComponent
|
import org.koin.core.component.KoinComponent
|
||||||
import org.koin.core.component.inject
|
import org.koin.core.component.inject
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.nio.file.Path
|
|
||||||
import java.nio.file.Paths
|
|
||||||
|
|
||||||
class ResourceFactory : KoinComponent {
|
class ResourceFactory : KoinComponent {
|
||||||
|
|
||||||
private val progressBarFactory by inject<ProgressBarFactory>()
|
private val progressBarFactory by inject<ProgressBarFactory>()
|
||||||
fun <T : Any> json(name: String, obj: T, serializationStrategy: SerializationStrategy<T>): Resource {
|
|
||||||
return SerializableResource<T>(
|
|
||||||
filename = name,
|
|
||||||
contentType = ContentType.Application.Json,
|
|
||||||
obj = obj,
|
|
||||||
serializer = serializationStrategy)
|
|
||||||
}
|
|
||||||
fun file(path: String): Resource {
|
|
||||||
return file(Paths.get(path))
|
|
||||||
}
|
|
||||||
fun file(path: Path): Resource {
|
|
||||||
return file(path.toFile())
|
|
||||||
}
|
|
||||||
fun file(file: File): Resource {
|
fun file(file: File): Resource {
|
||||||
val resource = FileResource(file)
|
val resource = FileResource(file)
|
||||||
return ProgressResource(resource, progressBarFactory)
|
return ProgressResource(resource, progressBarFactory)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,15 @@
|
||||||
package de.itkl.core_api.interfaces
|
package de.itkl.core_api.interfaces
|
||||||
|
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
|
interface ResourceReadDecorator {
|
||||||
|
fun decorate(
|
||||||
|
length: Long,
|
||||||
|
inputStream: InputStream): InputStream
|
||||||
|
}
|
||||||
|
|
||||||
|
class NoopResourceReadDecorator : ResourceReadDecorator {
|
||||||
|
override fun decorate(length: Long, inputStream: InputStream): InputStream {
|
||||||
|
return inputStream
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
package de.itkl.core_api.interfaces.assets
|
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
|
||||||
import kotlinx.coroutines.flow.Flow
|
|
||||||
import java.util.function.Consumer
|
|
||||||
|
|
||||||
interface Assets : Flow<Resource> {
|
|
||||||
suspend fun store(resource: Resource)
|
|
||||||
suspend fun retrieve(name: String): Resource?
|
|
||||||
suspend fun delete(name: String)
|
|
||||||
|
|
||||||
suspend fun exists(name: String): Boolean {
|
|
||||||
return retrieve(name) != null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,16 +0,0 @@
|
||||||
package de.itkl.core_api.interfaces.assets
|
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor2
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Executes a [FileProcessor2] on a [Resource]. It decides if and when
|
|
||||||
* the [FileProcessor2.process] should be called and what should happen with the result
|
|
||||||
*/
|
|
||||||
interface FileProcessorBackend {
|
|
||||||
suspend fun process(
|
|
||||||
resource: Resource,
|
|
||||||
assets: Assets,
|
|
||||||
fileProcessor: FileProcessor2)
|
|
||||||
}
|
|
||||||
|
|
@ -1,8 +0,0 @@
|
||||||
package de.itkl.core_api.interfaces.data
|
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor2
|
|
||||||
|
|
||||||
interface Processable {
|
|
||||||
suspend fun process(fileProcessor: FileProcessor2)
|
|
||||||
}
|
|
||||||
|
|
@ -1,29 +1,18 @@
|
||||||
package de.itkl.httpClient.clients
|
package de.itkl.httpClient.clients
|
||||||
|
|
||||||
import de.itkl.core_api.dtos.MsOcrResponse
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor2
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
import de.itkl.core_api.interfaces.Resource
|
||||||
import de.itkl.core_api.interfaces.ResourceFactory
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
import io.ktor.client.*
|
import io.ktor.client.*
|
||||||
import io.ktor.client.call.*
|
import io.ktor.client.call.*
|
||||||
import io.ktor.client.request.*
|
import io.ktor.client.request.*
|
||||||
import io.ktor.client.statement.*
|
import io.ktor.client.statement.*
|
||||||
import io.ktor.client.utils.EmptyContent.contentType
|
|
||||||
import io.ktor.http.*
|
import io.ktor.http.*
|
||||||
import kotlinx.serialization.json.Json
|
|
||||||
import org.koin.core.component.KoinComponent
|
import org.koin.core.component.KoinComponent
|
||||||
import org.koin.core.component.inject
|
import org.koin.core.component.inject
|
||||||
import java.io.File
|
|
||||||
import java.nio.file.Path
|
|
||||||
import kotlin.io.path.nameWithoutExtension
|
|
||||||
import kotlin.io.path.writeText
|
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
private val Log = KotlinLogging.logger { }
|
||||||
class MsOcr: KoinComponent, FileProcessor2 {
|
class MsOcr: KoinComponent {
|
||||||
private val httpClient: HttpClient by inject()
|
private val httpClient: HttpClient by inject()
|
||||||
private val resourceFactory: ResourceFactory by inject()
|
|
||||||
|
|
||||||
suspend fun ocr(resource: Resource): MsOcrResponse {
|
suspend fun ocr(resource: Resource): MsOcrResponse {
|
||||||
val response = httpClient.post {
|
val response = httpClient.post {
|
||||||
|
|
@ -35,13 +24,7 @@ class MsOcr: KoinComponent, FileProcessor2 {
|
||||||
contentType(resource.contentType)
|
contentType(resource.contentType)
|
||||||
setBody(resource.read())
|
setBody(resource.read())
|
||||||
}
|
}
|
||||||
|
println("got response: ${response.status} in ${response.responseTime}")
|
||||||
return response.body()
|
return response.body()
|
||||||
}
|
}
|
||||||
|
|
||||||
override val filename = "ms-ocr.json"
|
|
||||||
|
|
||||||
override suspend fun process(resource: Resource): Resource {
|
|
||||||
val result = ocr(resource)
|
|
||||||
return resourceFactory.json(filename, result, MsOcrResponse.serializer())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
package de.itkl.core_api.dtos
|
package de.itkl.httpClient.clients
|
||||||
|
|
||||||
|
|
||||||
import kotlinx.datetime.Instant
|
import kotlinx.datetime.Instant
|
||||||
|
import kotlinx.datetime.LocalDateTime
|
||||||
import kotlinx.serialization.SerialName
|
import kotlinx.serialization.SerialName
|
||||||
import kotlinx.serialization.Serializable
|
import kotlinx.serialization.Serializable
|
||||||
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
dependencies {
|
||||||
|
api(project(":libraries:core-api"))
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
package de.itkl.io.implementation
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import io.ktor.http.*
|
||||||
|
import java.io.File
|
||||||
|
import java.io.InputStream
|
||||||
|
|
||||||
|
class FileSystemResource(private val file: File) : Resource() {
|
||||||
|
override val filename: String
|
||||||
|
get() = file.name
|
||||||
|
override val contentType: ContentType
|
||||||
|
get() = ContentType.fromFilePath(file.path).first()
|
||||||
|
override val length: Long
|
||||||
|
get() = file.length()
|
||||||
|
|
||||||
|
override fun doRead(): InputStream {
|
||||||
|
return file.inputStream()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
package de.itkl.io
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.NoopResourceReadDecorator
|
||||||
|
import de.itkl.core_api.interfaces.ResourceReadDecorator
|
||||||
|
import org.koin.dsl.module
|
||||||
|
|
||||||
|
val ioModule = module {
|
||||||
|
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
|
||||||
|
}
|
||||||
|
|
@ -1,9 +1,6 @@
|
||||||
dependencies {
|
dependencies {
|
||||||
api(project(":libraries:core-api"))
|
api(project(":libraries:core-api"))
|
||||||
api("org.apache.lucene:lucene-analysis-common:9.9.0")
|
api("org.apache.lucene:lucene-analysis-common:9.9.0")
|
||||||
api("io.github.piruin:geok:1.2.2")
|
|
||||||
api(project(":libraries:assetmanager"))
|
|
||||||
api("com.soywiz.korge:korge-foundation:5.1.0")
|
|
||||||
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
|
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
|
||||||
implementation("com.google.guava:guava:32.1.3-jre")
|
implementation("com.google.guava:guava:32.1.3-jre")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,37 +0,0 @@
|
||||||
package de.itkl.textprocessing
|
|
||||||
|
|
||||||
import de.itkl.assetmanager.interfaces.Project
|
|
||||||
import de.itkl.assetmanager.interfaces.ProjectManager
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor
|
|
||||||
import de.itkl.core_api.interfaces.ResourceFactory
|
|
||||||
import de.itkl.core_api.interfaces.data.Processable
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
|
||||||
import org.koin.core.component.KoinComponent
|
|
||||||
import org.koin.core.component.inject
|
|
||||||
import org.koin.java.KoinJavaComponent.inject
|
|
||||||
import java.nio.file.Paths
|
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
|
||||||
|
|
||||||
class CorpusFactory : KoinComponent {
|
|
||||||
private val projectManager: ProjectManager by inject()
|
|
||||||
suspend fun load(name: String): Corpus {
|
|
||||||
Log.info { "Open corpus at ${Paths.get(name).toAbsolutePath()}" }
|
|
||||||
return Corpus(projectManager.load(name)).apply {
|
|
||||||
Log.debug { "Found documents in corpus: ${this.documentNames.joinToString("\n")}" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
class Corpus(private val project: Project): KoinComponent {
|
|
||||||
val displayName get() = project.displayName
|
|
||||||
val documentNames get() = project.documentNames
|
|
||||||
|
|
||||||
private val resourceFactory: ResourceFactory by inject()
|
|
||||||
|
|
||||||
suspend fun document(name: String): Document {
|
|
||||||
return Document(
|
|
||||||
project.resolveName(name),
|
|
||||||
listOf(project.resource(name)!!)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,103 +1,4 @@
|
||||||
package de.itkl.textprocessing
|
package de.itkl.textprocessing
|
||||||
|
|
||||||
import de.itkl.assetmanager.interfaces.AssetManager
|
class DocumentContainer {
|
||||||
import de.itkl.core_api.dtos.MsOcrResponse
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor
|
|
||||||
import de.itkl.core_api.interfaces.FileProcessor2
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
|
||||||
import de.itkl.core_api.interfaces.assets.Assets
|
|
||||||
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
|
|
||||||
import de.itkl.core_api.interfaces.data.Processable
|
|
||||||
import korlibs.math.geom.Rectangle
|
|
||||||
import kotlinx.coroutines.flow.Flow
|
|
||||||
import kotlinx.coroutines.flow.asFlow
|
|
||||||
import kotlinx.coroutines.flow.filter
|
|
||||||
import me.piruin.geok.LatLng
|
|
||||||
import me.piruin.geok.geometry.Polygon
|
|
||||||
import org.koin.core.component.KoinComponent
|
|
||||||
import org.koin.core.component.inject
|
|
||||||
|
|
||||||
class Document(
|
|
||||||
val name: String,
|
|
||||||
val resources: List<Resource>
|
|
||||||
) : Processable, KoinComponent {
|
|
||||||
private val assetManager: AssetManager by inject()
|
|
||||||
private val fileProcessorBackend: FileProcessorBackend by inject()
|
|
||||||
suspend fun assets(): Assets {
|
|
||||||
return assetManager.assets(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Loads the extracted ocr pages. Note that not every pages
|
|
||||||
* needs to have ocr
|
|
||||||
*/
|
|
||||||
suspend fun retrieveOcrPages(): List<OcrPage> {
|
|
||||||
// TODO: How to identify the assets independently from their name?
|
|
||||||
val resource = checkNotNull(assets()
|
|
||||||
.retrieve("ms-ocr.json")) {
|
|
||||||
"Ocr for $name is not yet created"
|
|
||||||
}
|
|
||||||
val msOcrResponse = resource.json(MsOcrResponse.serializer())
|
|
||||||
return msOcrResponse.analyzeResult.readResults.map { toOcrPage(it) }
|
|
||||||
}
|
|
||||||
override suspend fun process(fileProcessor: FileProcessor2) {
|
|
||||||
fileProcessorBackend.process(
|
|
||||||
resources.first(),
|
|
||||||
assets(),
|
|
||||||
fileProcessor
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun toOcrPage(readResult: MsOcrResponse.AnalyzeResult.ReadResult): OcrPage {
|
|
||||||
return OcrPage(
|
|
||||||
pageNumber = readResult.page,
|
|
||||||
width = readResult.width,
|
|
||||||
height = readResult.height,
|
|
||||||
words = readResult.lines.flatMap { line -> line.words.map { toOcrWord(it) } }
|
|
||||||
)
|
|
||||||
}
|
|
||||||
private fun toOcrWord(word: MsOcrResponse.AnalyzeResult.ReadResult.Line.Word): OcrPage.OcrWord {
|
|
||||||
val box = word.boundingBox
|
|
||||||
return OcrPage.OcrWord(
|
|
||||||
Rectangle(
|
|
||||||
x = box[0],
|
|
||||||
y = box[1],
|
|
||||||
width = box[2] - box[0],
|
|
||||||
height = box[7] - box[1]),
|
|
||||||
// polygon = Polygon(listOf(
|
|
||||||
// LatLng(box[0].toDouble(), box[1].toDouble()),
|
|
||||||
// LatLng(box[2].toDouble(), box[3].toDouble()),
|
|
||||||
// LatLng(box[4].toDouble(), box[5].toDouble()),
|
|
||||||
// LatLng(box[6].toDouble(), box[7].toDouble()),
|
|
||||||
// )),
|
|
||||||
text = word.text
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class OcrPage(
|
|
||||||
val width: Int,
|
|
||||||
val height: Int,
|
|
||||||
val pageNumber: Int,
|
|
||||||
val words: List<OcrWord>,
|
|
||||||
// val regions: List<DocumentRegion> = emptyList()
|
|
||||||
) {
|
|
||||||
// inner class DocumentRegion(
|
|
||||||
// private val polygon: Polygon,
|
|
||||||
// private val type: String,
|
|
||||||
// ) {
|
|
||||||
// fun words(): Flow<OcrWord> {
|
|
||||||
// return words
|
|
||||||
// .asFlow()
|
|
||||||
// .filter { word -> word.polygon.intersectionWith(polygon) != null }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
fun addOcrWord(rectangle: Rectangle, text: String): OcrWord {
|
|
||||||
return OcrWord(rectangle, text)
|
|
||||||
}
|
|
||||||
class OcrWord(
|
|
||||||
val rectangle: Rectangle,
|
|
||||||
val text: String
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,13 +1,11 @@
|
||||||
|
//pluginManagement {
|
||||||
|
// includeBuild("build-logic")
|
||||||
|
//}
|
||||||
|
|
||||||
plugins {
|
plugins {
|
||||||
id("org.gradle.toolchains.foojay-resolver-convention") version "0.4.0"
|
id("org.gradle.toolchains.foojay-resolver-convention") version "0.4.0"
|
||||||
}
|
}
|
||||||
|
|
||||||
rootProject.name = "docthor"
|
|
||||||
|
|
||||||
fun includeDirs(vararg paths: String) {
|
|
||||||
paths.forEach(this::includeDir)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun includeDir(path: String) {
|
fun includeDir(path: String) {
|
||||||
file(path)
|
file(path)
|
||||||
.listFiles()!!
|
.listFiles()!!
|
||||||
|
|
@ -20,9 +18,8 @@ fun includeDir(path: String) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rootProject.name = "docthor"
|
||||||
include(
|
include(
|
||||||
"app",
|
"app",
|
||||||
)
|
)
|
||||||
includeDirs(
|
includeDir("libraries")
|
||||||
"apps",
|
|
||||||
"libraries")
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue