Compare commits
14 Commits
9f3813a83a
...
5a3f4031d2
| Author | SHA1 | Date |
|---|---|---|
|
|
5a3f4031d2 | |
|
|
d23b4f472c | |
|
|
97b5444159 | |
|
|
2cab145008 | |
|
|
9ea725fc36 | |
|
|
accdfbca67 | |
|
|
7ed5a39bac | |
|
|
4ae5c3bf58 | |
|
|
949f87800a | |
|
|
ac412385bb | |
|
|
8ef054baa4 | |
|
|
a2483c85d7 | |
|
|
b4ab91e1db | |
|
|
c758d0b79d |
|
|
@ -1,4 +1,4 @@
|
||||||
.gradle
|
.gradle
|
||||||
build
|
build
|
||||||
.idea
|
.idea
|
||||||
assets
|
/assets
|
||||||
|
|
@ -7,4 +7,5 @@
|
||||||
start-page="docthor.md">
|
start-page="docthor.md">
|
||||||
|
|
||||||
<toc-element topic="docthor.md"/>
|
<toc-element topic="docthor.md"/>
|
||||||
|
<toc-element topic="Snippets.md"/>
|
||||||
</instance-profile>
|
</instance-profile>
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
# Snippets
|
||||||
|
|
||||||
|
## Scale a Shape alongside ZoomImage
|
||||||
|
|
||||||
|
|
||||||
|
```kotlin
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
fun shapes(zoomableState: ZoomableState) {
|
||||||
|
Box(modifier = Modifier.fillMaxSize()) {
|
||||||
|
val scaleX = zoomableState.transform.scaleX
|
||||||
|
val scaleY = zoomableState.transform.scaleY
|
||||||
|
Box(
|
||||||
|
modifier = Modifier
|
||||||
|
.offset { IntOffset(
|
||||||
|
((zoomableState.transform.offset.x + (288 * scaleX)) ).toInt(),
|
||||||
|
((zoomableState.transform.offset.y + (697 * scaleY)) ).toInt()
|
||||||
|
) }
|
||||||
|
.clip(RectangleShape)
|
||||||
|
.size(100.dp * scaleX)
|
||||||
|
.background(Color.Red)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scale a Canvas alongside Zoomimage
|
||||||
|
|
||||||
|
```kotlin
|
||||||
|
drawRect(
|
||||||
|
Color.Blue,
|
||||||
|
topLeft = zoomableState.transform.offset + (Offset(288 * zoomableState.transform.scaleX,697 * zoomableState.transform.scaleY)),
|
||||||
|
size = Size( (793 - 288)* zoomableState.transform.scaleX, (741 - 697) * zoomableState.transform.scaleY),
|
||||||
|
style = Stroke(width = 5f)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
@ -11,6 +11,15 @@ Asset can be found under <path>memento:/mnt/wd/export/data</path>
|
||||||
<def title="PDF Renderer for Compose">
|
<def title="PDF Renderer for Compose">
|
||||||
<a href="https://github.com/GRizzi91/bouquet">bouquet</a>
|
<a href="https://github.com/GRizzi91/bouquet">bouquet</a>
|
||||||
</def>
|
</def>
|
||||||
|
<def title="Moko Resource">
|
||||||
|
<a href="https://github.com/icerockdev/moko-resources">Resource Management für Compose</a>
|
||||||
|
</def>
|
||||||
|
<def title="Aurora">
|
||||||
|
<a href="https://github.com/kirill-grouchnikov/aurora">Building modern, elegant and fast desktop Compose applications</a>
|
||||||
|
</def>
|
||||||
|
<def title="Zoomimage">
|
||||||
|
<a href="https://github.com/panpf/zoomimage">Zooming an Image</a>
|
||||||
|
</def>
|
||||||
</deflist>
|
</deflist>
|
||||||
|
|
||||||
## Modules - Libraries
|
## Modules - Libraries
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
plugins {
|
plugins {
|
||||||
id("docthor.kotlin-application-conventions")
|
id("docthor.kotlin-application-conventions")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ class ComputeIdf : CliktCommand() {
|
||||||
.required()
|
.required()
|
||||||
|
|
||||||
override fun run() = runBlocking {
|
override fun run() = runBlocking {
|
||||||
TfIdfPipeline(force = true)
|
TfIdfPipeline(force = false)
|
||||||
.input(corpus)
|
.input(corpus)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
plugins {
|
||||||
|
id("org.jetbrains.compose") version "1.5.11"
|
||||||
|
}
|
||||||
|
|
||||||
|
repositories {
|
||||||
|
google()
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
fun addProjects(vararg names: String) {
|
||||||
|
names.forEach {
|
||||||
|
implementation(project(":libraries:$it"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
addProjects(
|
||||||
|
"assetmanager",
|
||||||
|
"core-api",
|
||||||
|
"textprocessing",
|
||||||
|
"httpClient",
|
||||||
|
"tui",
|
||||||
|
)
|
||||||
|
|
||||||
|
implementation("org.pushing-pixels:aurora-theming:1.3.0")
|
||||||
|
implementation("org.pushing-pixels:aurora-component:1.3.0")
|
||||||
|
implementation("org.pushing-pixels:aurora-window:1.3.0")
|
||||||
|
implementation(compose.desktop.currentOs)
|
||||||
|
implementation("io.github.panpf.zoomimage:zoomimage-compose:1.0.0-beta11")
|
||||||
|
implementation("io.github.panpf.zoomimage:zoomimage-compose-desktop:1.0.0-beta11")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,235 @@
|
||||||
|
package de.itkl.documentViewer
|
||||||
|
|
||||||
|
import androidx.compose.foundation.*
|
||||||
|
import androidx.compose.foundation.layout.*
|
||||||
|
import androidx.compose.material.Text
|
||||||
|
import androidx.compose.runtime.*
|
||||||
|
import androidx.compose.ui.Alignment
|
||||||
|
import androidx.compose.ui.ExperimentalComposeUiApi
|
||||||
|
import androidx.compose.ui.Modifier
|
||||||
|
import androidx.compose.ui.draw.clip
|
||||||
|
import androidx.compose.ui.geometry.Offset
|
||||||
|
import androidx.compose.ui.geometry.Size
|
||||||
|
import androidx.compose.ui.graphics.Color
|
||||||
|
import androidx.compose.ui.graphics.ImageBitmap
|
||||||
|
import androidx.compose.ui.graphics.RectangleShape
|
||||||
|
import androidx.compose.ui.graphics.drawscope.Stroke
|
||||||
|
import androidx.compose.ui.graphics.painter.BitmapPainter
|
||||||
|
import androidx.compose.ui.graphics.painter.Painter
|
||||||
|
import androidx.compose.ui.layout.ContentScale
|
||||||
|
import androidx.compose.ui.res.loadImageBitmap
|
||||||
|
import androidx.compose.ui.unit.DpSize
|
||||||
|
import androidx.compose.ui.unit.IntOffset
|
||||||
|
import androidx.compose.ui.unit.dp
|
||||||
|
import androidx.compose.ui.window.WindowPlacement
|
||||||
|
import androidx.compose.ui.window.WindowPosition
|
||||||
|
import androidx.compose.ui.window.rememberWindowState
|
||||||
|
import com.github.panpf.zoomimage.ZoomImage
|
||||||
|
import com.github.panpf.zoomimage.compose.ZoomState
|
||||||
|
import com.github.panpf.zoomimage.compose.rememberZoomState
|
||||||
|
import com.github.panpf.zoomimage.compose.zoom.*
|
||||||
|
import de.itkl.assetmanager.assetManagerModule
|
||||||
|
import de.itkl.core_api.coreApiModule
|
||||||
|
import de.itkl.httpClient.clients.MsOcr
|
||||||
|
import de.itkl.httpClient.httpClientModule
|
||||||
|
import de.itkl.textprocessing.CorpusFactory
|
||||||
|
import de.itkl.textprocessing.Document
|
||||||
|
import de.itkl.textprocessing.OcrPage
|
||||||
|
import de.itkl.textprocessing.textProcessingModule
|
||||||
|
import de.itkl.tui.tuiModule
|
||||||
|
import kotlinx.coroutines.Dispatchers
|
||||||
|
import kotlinx.coroutines.withContext
|
||||||
|
import org.pushingpixels.aurora.theming.auroraBackground
|
||||||
|
import org.pushingpixels.aurora.theming.marinerSkin
|
||||||
|
import org.pushingpixels.aurora.window.AuroraWindow
|
||||||
|
import org.pushingpixels.aurora.window.AuroraWindowTitlePaneConfigurations
|
||||||
|
import org.pushingpixels.aurora.window.auroraApplication
|
||||||
|
import java.io.File
|
||||||
|
import java.io.IOException
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import kotlinx.coroutines.runBlocking
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.inject
|
||||||
|
import org.koin.core.context.startKoin
|
||||||
|
import com.github.panpf.zoomimage.util.Logger as ZoomLogger
|
||||||
|
|
||||||
|
private val Log = KotlinLogging.logger { }
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentViewer : KoinComponent {
|
||||||
|
suspend fun loadTestDocument(): Document {
|
||||||
|
val corpus = CorpusFactory().load("assets/xs-reg")
|
||||||
|
val document = corpus.document("00001.jpg")
|
||||||
|
val ocrExtractor: MsOcr by inject()
|
||||||
|
document.process(ocrExtractor)
|
||||||
|
return document
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun main() = auroraApplication {
|
||||||
|
startKoin {
|
||||||
|
modules(
|
||||||
|
coreApiModule,
|
||||||
|
textProcessingModule,
|
||||||
|
tuiModule,
|
||||||
|
assetManagerModule,
|
||||||
|
httpClientModule)
|
||||||
|
}
|
||||||
|
|
||||||
|
val document = runBlocking {
|
||||||
|
DocumentViewer().loadTestDocument()
|
||||||
|
}
|
||||||
|
|
||||||
|
val state = rememberWindowState(
|
||||||
|
placement = WindowPlacement.Floating,
|
||||||
|
position = WindowPosition.Aligned(Alignment.Center),
|
||||||
|
size = DpSize(1000. dp, 800.dp)
|
||||||
|
)
|
||||||
|
AuroraWindow(
|
||||||
|
skin = marinerSkin(),
|
||||||
|
title = "Document Viewer",
|
||||||
|
state = state,
|
||||||
|
windowTitlePaneConfiguration = AuroraWindowTitlePaneConfigurations.AuroraPlain(),
|
||||||
|
onCloseRequest = ::exitApplication
|
||||||
|
) {
|
||||||
|
viewImage(document)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
fun viewImage(document: Document) {
|
||||||
|
val ocr = remember { runBlocking { document.retrieveOcrPages().first() } }
|
||||||
|
Column (
|
||||||
|
modifier = Modifier.fillMaxSize().auroraBackground()
|
||||||
|
) {
|
||||||
|
val state = rememberZoomState(logger = ZoomLogger("zoom", level = ZoomLogger.INFO))
|
||||||
|
Text("${state.zoomable.transform.scale} ${state.zoomable.transform.offset}")
|
||||||
|
Box(
|
||||||
|
modifier = Modifier.fillMaxSize()
|
||||||
|
) {
|
||||||
|
ZoomedImage(
|
||||||
|
state = state,
|
||||||
|
load = { loadImageBitmap(File("assets/xs-reg/00001.jpg")) },
|
||||||
|
painterFor = { remember { BitmapPainter(it) } },
|
||||||
|
contentDescription = "Sample",
|
||||||
|
modifier = Modifier.fillMaxSize()
|
||||||
|
)
|
||||||
|
canvas(state.zoomable, ocr)
|
||||||
|
// shapes(state.zoomable)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
fun <T> ZoomedImage(
|
||||||
|
state: ZoomState,
|
||||||
|
load: suspend () -> T,
|
||||||
|
painterFor: @Composable (T) -> Painter,
|
||||||
|
contentDescription: String,
|
||||||
|
modifier: Modifier = Modifier,
|
||||||
|
contentScale: ContentScale = ContentScale.Fit,
|
||||||
|
) {
|
||||||
|
val image: T? by produceState<T?>(null) {
|
||||||
|
value = withContext(Dispatchers.IO) {
|
||||||
|
try {
|
||||||
|
load()
|
||||||
|
} catch (e: IOException) {
|
||||||
|
// instead of printing to console, you can also write this to log,
|
||||||
|
// or show some error placeholder
|
||||||
|
e.printStackTrace()
|
||||||
|
null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (image != null) {
|
||||||
|
val scrollBar = remember {
|
||||||
|
ScrollBarSpec(
|
||||||
|
color = Color.Red,
|
||||||
|
size = 6.dp,
|
||||||
|
margin = 12.dp,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
ZoomImage(
|
||||||
|
painter = painterFor(image!!),
|
||||||
|
contentDescription = contentDescription,
|
||||||
|
contentScale = contentScale,
|
||||||
|
modifier = modifier,
|
||||||
|
scrollBar = scrollBar,
|
||||||
|
state = state
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fun loadImageBitmap(file: File): ImageBitmap =
|
||||||
|
file.inputStream().buffered().use(::loadImageBitmap)
|
||||||
|
|
||||||
|
data class PointConverter(
|
||||||
|
val docWidth: Int,
|
||||||
|
val docHeight: Int,
|
||||||
|
val canvasWidth: Float,
|
||||||
|
val canvasHeight: Float
|
||||||
|
) {
|
||||||
|
fun convertX(x: Int): Float {
|
||||||
|
val xf = x.toFloat()
|
||||||
|
val relXf = docWidth / xf
|
||||||
|
val scaledXf = canvasWidth * relXf
|
||||||
|
// println("X: $scaledXf")
|
||||||
|
return scaledXf
|
||||||
|
}
|
||||||
|
fun convertY(y: Int): Float {
|
||||||
|
val yf = y.toFloat()
|
||||||
|
val relYf = docHeight / yf
|
||||||
|
val scaledYf = canvasHeight * relYf
|
||||||
|
// println("Y: $scaledYf")
|
||||||
|
return scaledYf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@OptIn(ExperimentalFoundationApi::class)
|
||||||
|
@Composable
|
||||||
|
fun shapes(zoomableState: ZoomableState) {
|
||||||
|
Box(modifier = Modifier.fillMaxSize()) {
|
||||||
|
val scaleX = zoomableState.transform.scaleX
|
||||||
|
val scaleY = zoomableState.transform.scaleY
|
||||||
|
Box(
|
||||||
|
modifier = Modifier
|
||||||
|
.offset { IntOffset(
|
||||||
|
((zoomableState.transform.offset.x + (288 * scaleX)) ).toInt(),
|
||||||
|
((zoomableState.transform.offset.y + (697 * scaleY)) ).toInt()
|
||||||
|
) }
|
||||||
|
.clip(RectangleShape)
|
||||||
|
.size(100.dp * scaleX)
|
||||||
|
.background(Color.Red)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@OptIn(ExperimentalComposeUiApi::class)
|
||||||
|
@Composable
|
||||||
|
fun canvas(zoomableState: ZoomableState, first: OcrPage) {
|
||||||
|
Canvas(modifier = Modifier
|
||||||
|
.fillMaxSize()
|
||||||
|
// .onPointerEvent(PointerEventType.Move) {
|
||||||
|
// val position = it.changes.first().position
|
||||||
|
// println(position)
|
||||||
|
// }
|
||||||
|
)
|
||||||
|
{
|
||||||
|
val converter = PointConverter(
|
||||||
|
docWidth = 2481,
|
||||||
|
docHeight = 3507,
|
||||||
|
canvasWidth = this.size.width,
|
||||||
|
canvasHeight = this.size.height
|
||||||
|
)
|
||||||
|
|
||||||
|
first.words.forEach { word ->
|
||||||
|
val rect = word.rectangle
|
||||||
|
drawRect(
|
||||||
|
Color.Blue,
|
||||||
|
topLeft = zoomableState.transform.offset + (Offset(rect.x.toFloat() * zoomableState.transform.scaleX,rect.y.toFloat() * zoomableState.transform.scaleY)),
|
||||||
|
size = Size(rect.width.toFloat() * zoomableState.transform.scaleX, rect.height.toFloat() * zoomableState.transform.scaleY),
|
||||||
|
style = Stroke(width = 5f)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
project(":libraries").subprojects {
|
project(":libraries").subprojects {
|
||||||
apply(plugin = "docthor.kotlin-library-conventions")
|
apply(plugin = "docthor.kotlin-library-conventions")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
project(":apps").subprojects {
|
||||||
|
apply(plugin = "docthor.kotlin-application-conventions")
|
||||||
|
}
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import gradle.kotlin.dsl.accessors._d9dcfd1a467b0b6fe90c5571a57aa558.api
|
||||||
import org.gradle.api.plugins.jvm.JvmTestSuite
|
import org.gradle.api.plugins.jvm.JvmTestSuite
|
||||||
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
|
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
|
||||||
|
|
||||||
|
|
@ -17,6 +18,7 @@ dependencies {
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
|
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
|
||||||
|
|
||||||
|
|
||||||
|
api("io.github.oshai:kotlin-logging-jvm:5.1.0")
|
||||||
testImplementation("io.insert-koin:koin-test:$koin_version")
|
testImplementation("io.insert-koin:koin-test:$koin_version")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
[versions]
|
||||||
|
kotlin = "1.9.21"
|
||||||
|
coroutines = "1.7.3"
|
||||||
|
compose = "1.5.11"
|
||||||
|
dokka = "1.9.10"
|
||||||
|
batik = "1.17"
|
||||||
|
|
||||||
|
versionchecker = "0.50.0"
|
||||||
|
mavenpublish = "0.25.3"
|
||||||
|
|
||||||
|
[libraries]
|
||||||
|
compose-desktop = { module = "org.jetbrains.compose:compose-gradle-plugin", version.ref = "compose" }
|
||||||
|
kotlin-gradlePlugin = { module = "org.jetbrains.kotlin:kotlin-gradle-plugin", version.ref = "kotlin" }
|
||||||
|
kotlin-coroutines = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version.ref = "coroutines" }
|
||||||
|
dokka-gradlePlugin = { module = "org.jetbrains.dokka:dokka-gradle-plugin", version.ref = "dokka"}
|
||||||
|
batik = { module = "org.apache.xmlgraphics:batik-all", version.ref = "batik" }
|
||||||
|
|
||||||
|
versionchecker-gradlePlugin = { module = "com.github.ben-manes:gradle-versions-plugin", version.ref = "versionchecker" }
|
||||||
|
mavenpublish-gradlePlugin = { module = "com.vanniktech:gradle-maven-publish-plugin", version.ref = "mavenpublish" }
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
dependencies {
|
||||||
|
api(project(":libraries:core-api"))
|
||||||
|
// used for contentType
|
||||||
|
api("io.ktor:ktor-http-jvm:2.3.7")
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
package de.itkl.assetmanager
|
||||||
|
|
||||||
|
import de.itkl.assetmanager.implementation.AssetsFileProcessorBackend
|
||||||
|
import de.itkl.assetmanager.implementation.FilesystemAssetManager
|
||||||
|
import de.itkl.assetmanager.implementation.FilesystemProjectManager
|
||||||
|
import de.itkl.assetmanager.interfaces.AssetManager
|
||||||
|
import de.itkl.assetmanager.interfaces.ProjectManager
|
||||||
|
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
|
||||||
|
import org.koin.dsl.module
|
||||||
|
|
||||||
|
val assetManagerModule = module {
|
||||||
|
single<ProjectManager> { FilesystemProjectManager() }
|
||||||
|
single<AssetManager> { FilesystemAssetManager() }
|
||||||
|
single<FileProcessorBackend> { AssetsFileProcessorBackend() }
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
package de.itkl.assetmanager.implementation
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor2
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import de.itkl.core_api.interfaces.assets.Assets
|
||||||
|
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
|
||||||
|
private val Log = KotlinLogging.logger { }
|
||||||
|
class AssetsFileProcessorBackend : FileProcessorBackend, KoinComponent {
|
||||||
|
override suspend fun process(resource: Resource, assets: Assets, fileProcessor: FileProcessor2) {
|
||||||
|
Log.debug { "Call processor '${fileProcessor.filename}' on $resource" }
|
||||||
|
if (assets.exists(fileProcessor.filename)) {
|
||||||
|
Log.info { "${fileProcessor.filename} already exists on ${resource}. Skipping" }
|
||||||
|
} else {
|
||||||
|
Log.info { "${fileProcessor.filename} does not yet exists for $resource" }
|
||||||
|
val newResource = fileProcessor.process(resource)
|
||||||
|
assets.store(newResource)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,84 @@
|
||||||
|
package de.itkl.assetmanager.implementation
|
||||||
|
|
||||||
|
import de.itkl.assetmanager.interfaces.AssetManager
|
||||||
|
import de.itkl.core_api.interfaces.assets.Assets
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import de.itkl.core_api.interfaces.ResourceFactory
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import kotlinx.coroutines.Dispatchers
|
||||||
|
import kotlinx.coroutines.flow.FlowCollector
|
||||||
|
import kotlinx.coroutines.flow.emitAll
|
||||||
|
import kotlinx.coroutines.flow.map
|
||||||
|
import kotlinx.coroutines.stream.consumeAsFlow
|
||||||
|
import kotlinx.coroutines.withContext
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.inject
|
||||||
|
import java.nio.file.Files
|
||||||
|
import java.nio.file.Path
|
||||||
|
import java.nio.file.Paths
|
||||||
|
import kotlin.io.path.deleteExisting
|
||||||
|
import kotlin.io.path.exists
|
||||||
|
import kotlin.io.path.outputStream
|
||||||
|
|
||||||
|
private val Log = KotlinLogging.logger { }
|
||||||
|
class FilesystemAssetManager: AssetManager {
|
||||||
|
override suspend fun assets(name: String): Assets {
|
||||||
|
val path = createAssetsPath(name)
|
||||||
|
withContext(Dispatchers.IO) {
|
||||||
|
Files.createDirectories(path)
|
||||||
|
}
|
||||||
|
return FilesystemAssets(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun delete(name: String) {
|
||||||
|
val path = createAssetsPath(name)
|
||||||
|
withContext(Dispatchers.IO) {
|
||||||
|
Files.delete(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun createAssetsPath(name: String): Path {
|
||||||
|
return Paths.get(name).parent.resolve("$name.assets.d").toAbsolutePath()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class FilesystemAssets(private val baseDir: Path) : Assets, KoinComponent {
|
||||||
|
|
||||||
|
private val resourceFactory by inject<ResourceFactory>()
|
||||||
|
override suspend fun store(resource: Resource) {
|
||||||
|
val destination = baseDir.resolve(resource.filename)
|
||||||
|
resource.read().use { source ->
|
||||||
|
destination.outputStream().use {output ->
|
||||||
|
withContext(Dispatchers.IO) {
|
||||||
|
source.copyTo(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun retrieve(name: String): Resource? {
|
||||||
|
val destination = baseDir.resolve(name)
|
||||||
|
if (!destination.exists()) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
Log.debug { "Loading file at $destination" }
|
||||||
|
val resource = resourceFactory.file(destination)
|
||||||
|
return resource
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
override suspend fun delete(name: String) {
|
||||||
|
val destination = baseDir.resolve(name)
|
||||||
|
withContext(Dispatchers.IO) {
|
||||||
|
destination.deleteExisting()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun collect(collector: FlowCollector<Resource>) {
|
||||||
|
val flow = withContext(Dispatchers.IO) {
|
||||||
|
Files.list(baseDir).consumeAsFlow()
|
||||||
|
}
|
||||||
|
.map { path -> resourceFactory.file(path) }
|
||||||
|
collector.emitAll(flow)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
package de.itkl.assetmanager.implementation
|
||||||
|
|
||||||
|
import de.itkl.assetmanager.interfaces.AssetManager
|
||||||
|
import de.itkl.core_api.interfaces.assets.Assets
|
||||||
|
import de.itkl.assetmanager.interfaces.Project
|
||||||
|
import de.itkl.assetmanager.interfaces.ProjectManager
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import de.itkl.core_api.interfaces.ResourceFactory
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import kotlinx.coroutines.Dispatchers
|
||||||
|
import kotlinx.coroutines.withContext
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.inject
|
||||||
|
import java.nio.file.Paths
|
||||||
|
import kotlin.io.path.isDirectory
|
||||||
|
import kotlin.io.path.isRegularFile
|
||||||
|
import kotlin.io.path.listDirectoryEntries
|
||||||
|
|
||||||
|
private val Log = KotlinLogging.logger { }
|
||||||
|
|
||||||
|
class FilesystemProjectManager : ProjectManager {
|
||||||
|
override suspend fun load(name: String): Project {
|
||||||
|
val path = Paths.get(name)
|
||||||
|
check(path.isDirectory()) {
|
||||||
|
"Currently only directories as corpora are supported"
|
||||||
|
}
|
||||||
|
val documents =
|
||||||
|
withContext(Dispatchers.IO) {
|
||||||
|
path.listDirectoryEntries()
|
||||||
|
.filter { it.isRegularFile() }
|
||||||
|
.map { it.toAbsolutePath() }
|
||||||
|
.map { it.toString() }
|
||||||
|
}
|
||||||
|
return FilesystemProject(
|
||||||
|
name = name,
|
||||||
|
displayName = path.fileName.toString(),
|
||||||
|
documentNames = documents)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class FilesystemProject(
|
||||||
|
override val name: String,
|
||||||
|
override val displayName: String,
|
||||||
|
override val documentNames: List<String>
|
||||||
|
) : Project, KoinComponent {
|
||||||
|
|
||||||
|
private val basePath = Paths.get(name)
|
||||||
|
|
||||||
|
private val assetManager: AssetManager by inject()
|
||||||
|
private val resourceFactory: ResourceFactory by inject()
|
||||||
|
override fun resolveName(name: String): String {
|
||||||
|
return basePath.resolve(name).toAbsolutePath().toString()
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun assets(documentName: String): Assets {
|
||||||
|
return assetManager.assets(documentName)
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun resource(name: String): Resource? {
|
||||||
|
Log.debug { "Project: opening resource of name $name" }
|
||||||
|
|
||||||
|
return resourceFactory.file(basePath.resolve(name))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
package de.itkl.assetmanager.interfaces
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.assets.Assets
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Manage the assets for one document
|
||||||
|
*/
|
||||||
|
interface AssetManager {
|
||||||
|
suspend fun assets(name: String): Assets
|
||||||
|
suspend fun delete(name: String)
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,18 @@
|
||||||
|
package de.itkl.assetmanager.interfaces
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import de.itkl.core_api.interfaces.assets.Assets
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A set of documents. Each can hold its own assets
|
||||||
|
*/
|
||||||
|
interface Project {
|
||||||
|
val name: String
|
||||||
|
val displayName: String
|
||||||
|
val documentNames: List<String>
|
||||||
|
|
||||||
|
fun resolveName(name: String): String
|
||||||
|
suspend fun assets(documentName: String): Assets
|
||||||
|
suspend fun resource(name: String): Resource?
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
package de.itkl.assetmanager.interfaces
|
||||||
|
|
||||||
|
interface ProjectManager {
|
||||||
|
suspend fun load(name: String): Project
|
||||||
|
}
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
|
plugins {
|
||||||
|
kotlin("plugin.serialization") version embeddedKotlinVersion
|
||||||
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
// used for contentType
|
// used for contentType
|
||||||
api("io.ktor:ktor-http-jvm:2.3.7")
|
api("io.ktor:ktor-http-jvm:2.3.7")
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,8 @@
|
||||||
package de.itkl.core_api
|
package de.itkl.core_api
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.NoopResourceReadDecorator
|
|
||||||
import de.itkl.core_api.interfaces.ResourceFactory
|
import de.itkl.core_api.interfaces.ResourceFactory
|
||||||
import de.itkl.core_api.interfaces.ResourceReadDecorator
|
|
||||||
import org.koin.dsl.module
|
import org.koin.dsl.module
|
||||||
|
|
||||||
val coreApiModule = module {
|
val coreApiModule = module {
|
||||||
single<ResourceFactory> { ResourceFactory()}
|
single<ResourceFactory> { ResourceFactory()}
|
||||||
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,8 +1,7 @@
|
||||||
package de.itkl.httpClient.clients
|
package de.itkl.core_api.dtos
|
||||||
|
|
||||||
|
|
||||||
import kotlinx.datetime.Instant
|
import kotlinx.datetime.Instant
|
||||||
import kotlinx.datetime.LocalDateTime
|
|
||||||
import kotlinx.serialization.SerialName
|
import kotlinx.serialization.SerialName
|
||||||
import kotlinx.serialization.Serializable
|
import kotlinx.serialization.Serializable
|
||||||
|
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
package de.itkl.core_api.implementation
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import io.ktor.http.*
|
||||||
|
import kotlinx.serialization.*
|
||||||
|
import kotlinx.serialization.json.Json
|
||||||
|
import kotlinx.serialization.json.encodeToStream
|
||||||
|
import java.io.File
|
||||||
|
import java.io.InputStream
|
||||||
|
import java.io.UnsupportedEncodingException
|
||||||
|
import java.nio.file.Path
|
||||||
|
|
||||||
|
class SerializableResource<T : Any> @OptIn(ExperimentalSerializationApi::class) constructor(
|
||||||
|
override val filename: String,
|
||||||
|
override val contentType: ContentType,
|
||||||
|
private val obj: T,
|
||||||
|
private val serializer: SerializationStrategy<T>
|
||||||
|
) : Resource {
|
||||||
|
|
||||||
|
override val length: Long? = null
|
||||||
|
override val file: File? = null
|
||||||
|
override val path: Path? = null
|
||||||
|
|
||||||
|
override fun read(): InputStream {
|
||||||
|
return serialize().byteInputStream()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun serialize(): String {
|
||||||
|
return when(contentType) {
|
||||||
|
ContentType.Application.Json -> Json.encodeToString(serializer, obj)
|
||||||
|
else -> throw UnsupportedEncodingException("Sorry but $contentType is not supported for Resources")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -2,8 +2,14 @@ package de.itkl.core_api.interfaces
|
||||||
|
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
|
import java.util.function.Consumer
|
||||||
|
|
||||||
interface FileProcessor {
|
interface FileProcessor {
|
||||||
fun willProduce(path: Path): Path
|
fun willProduce(path: Path): Path
|
||||||
suspend fun process(resource: Resource): File
|
suspend fun process(resource: Resource): File
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface FileProcessor2 {
|
||||||
|
val filename: String
|
||||||
|
suspend fun process(resource: Resource): Resource
|
||||||
|
}
|
||||||
|
|
@ -1,11 +1,15 @@
|
||||||
package de.itkl.core_api.interfaces
|
package de.itkl.core_api.interfaces
|
||||||
|
|
||||||
import io.ktor.http.*
|
import io.ktor.http.*
|
||||||
|
import kotlinx.serialization.DeserializationStrategy
|
||||||
|
import kotlinx.serialization.KSerializer
|
||||||
|
import kotlinx.serialization.json.Json
|
||||||
import org.koin.core.component.KoinComponent
|
import org.koin.core.component.KoinComponent
|
||||||
import org.koin.core.component.get
|
import org.koin.core.component.get
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
|
import kotlin.reflect.KClass
|
||||||
|
|
||||||
interface Resource {
|
interface Resource {
|
||||||
val filename: String
|
val filename: String
|
||||||
|
|
@ -15,8 +19,16 @@ interface Resource {
|
||||||
val file: File?
|
val file: File?
|
||||||
val path: Path?
|
val path: Path?
|
||||||
fun read(): InputStream
|
fun read(): InputStream
|
||||||
|
|
||||||
|
fun <T: Any> json(deserializer: DeserializationStrategy<T>): T {
|
||||||
|
val string = String(read().readAllBytes())
|
||||||
|
return Json.decodeFromString(deserializer, string)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Automatically adds koin injectable decorators to reading/writing
|
* Automatically adds koin injectable decorators to reading/writing
|
||||||
* operations
|
* operations
|
||||||
|
|
@ -24,11 +36,10 @@ interface Resource {
|
||||||
abstract class AbstractResource : Resource, KoinComponent {
|
abstract class AbstractResource : Resource, KoinComponent {
|
||||||
abstract fun doRead(): InputStream
|
abstract fun doRead(): InputStream
|
||||||
final override fun read(): InputStream {
|
final override fun read(): InputStream {
|
||||||
return length?.let { length ->
|
return doRead()
|
||||||
get<ResourceReadDecorator>().decorate(
|
}
|
||||||
length = length,
|
|
||||||
doRead()
|
override fun toString(): String {
|
||||||
)
|
return filename
|
||||||
} ?: doRead()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2,13 +2,31 @@ package de.itkl.core_api.interfaces
|
||||||
|
|
||||||
import de.itkl.core_api.implementation.FileResource
|
import de.itkl.core_api.implementation.FileResource
|
||||||
import de.itkl.core_api.implementation.ProgressResource
|
import de.itkl.core_api.implementation.ProgressResource
|
||||||
|
import de.itkl.core_api.implementation.SerializableResource
|
||||||
|
import io.ktor.http.*
|
||||||
|
import kotlinx.serialization.SerializationStrategy
|
||||||
import org.koin.core.component.KoinComponent
|
import org.koin.core.component.KoinComponent
|
||||||
import org.koin.core.component.inject
|
import org.koin.core.component.inject
|
||||||
import java.io.File
|
import java.io.File
|
||||||
|
import java.nio.file.Path
|
||||||
|
import java.nio.file.Paths
|
||||||
|
|
||||||
class ResourceFactory : KoinComponent {
|
class ResourceFactory : KoinComponent {
|
||||||
|
|
||||||
private val progressBarFactory by inject<ProgressBarFactory>()
|
private val progressBarFactory by inject<ProgressBarFactory>()
|
||||||
|
fun <T : Any> json(name: String, obj: T, serializationStrategy: SerializationStrategy<T>): Resource {
|
||||||
|
return SerializableResource<T>(
|
||||||
|
filename = name,
|
||||||
|
contentType = ContentType.Application.Json,
|
||||||
|
obj = obj,
|
||||||
|
serializer = serializationStrategy)
|
||||||
|
}
|
||||||
|
fun file(path: String): Resource {
|
||||||
|
return file(Paths.get(path))
|
||||||
|
}
|
||||||
|
fun file(path: Path): Resource {
|
||||||
|
return file(path.toFile())
|
||||||
|
}
|
||||||
fun file(file: File): Resource {
|
fun file(file: File): Resource {
|
||||||
val resource = FileResource(file)
|
val resource = FileResource(file)
|
||||||
return ProgressResource(resource, progressBarFactory)
|
return ProgressResource(resource, progressBarFactory)
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,3 @@
|
||||||
package de.itkl.core_api.interfaces
|
package de.itkl.core_api.interfaces
|
||||||
|
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
interface ResourceReadDecorator {
|
|
||||||
fun decorate(
|
|
||||||
length: Long,
|
|
||||||
inputStream: InputStream): InputStream
|
|
||||||
}
|
|
||||||
|
|
||||||
class NoopResourceReadDecorator : ResourceReadDecorator {
|
|
||||||
override fun decorate(length: Long, inputStream: InputStream): InputStream {
|
|
||||||
return inputStream
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
package de.itkl.core_api.interfaces.assets
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import kotlinx.coroutines.flow.Flow
|
||||||
|
import java.util.function.Consumer
|
||||||
|
|
||||||
|
interface Assets : Flow<Resource> {
|
||||||
|
suspend fun store(resource: Resource)
|
||||||
|
suspend fun retrieve(name: String): Resource?
|
||||||
|
suspend fun delete(name: String)
|
||||||
|
|
||||||
|
suspend fun exists(name: String): Boolean {
|
||||||
|
return retrieve(name) != null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
package de.itkl.core_api.interfaces.assets
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor2
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes a [FileProcessor2] on a [Resource]. It decides if and when
|
||||||
|
* the [FileProcessor2.process] should be called and what should happen with the result
|
||||||
|
*/
|
||||||
|
interface FileProcessorBackend {
|
||||||
|
suspend fun process(
|
||||||
|
resource: Resource,
|
||||||
|
assets: Assets,
|
||||||
|
fileProcessor: FileProcessor2)
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
package de.itkl.core_api.interfaces.data
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor2
|
||||||
|
|
||||||
|
interface Processable {
|
||||||
|
suspend fun process(fileProcessor: FileProcessor2)
|
||||||
|
}
|
||||||
|
|
@ -1,18 +1,29 @@
|
||||||
package de.itkl.httpClient.clients
|
package de.itkl.httpClient.clients
|
||||||
|
|
||||||
|
import de.itkl.core_api.dtos.MsOcrResponse
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor2
|
||||||
import de.itkl.core_api.interfaces.Resource
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import de.itkl.core_api.interfaces.ResourceFactory
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
import io.ktor.client.*
|
import io.ktor.client.*
|
||||||
import io.ktor.client.call.*
|
import io.ktor.client.call.*
|
||||||
import io.ktor.client.request.*
|
import io.ktor.client.request.*
|
||||||
import io.ktor.client.statement.*
|
import io.ktor.client.statement.*
|
||||||
|
import io.ktor.client.utils.EmptyContent.contentType
|
||||||
import io.ktor.http.*
|
import io.ktor.http.*
|
||||||
|
import kotlinx.serialization.json.Json
|
||||||
import org.koin.core.component.KoinComponent
|
import org.koin.core.component.KoinComponent
|
||||||
import org.koin.core.component.inject
|
import org.koin.core.component.inject
|
||||||
|
import java.io.File
|
||||||
|
import java.nio.file.Path
|
||||||
|
import kotlin.io.path.nameWithoutExtension
|
||||||
|
import kotlin.io.path.writeText
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
private val Log = KotlinLogging.logger { }
|
||||||
class MsOcr: KoinComponent {
|
class MsOcr: KoinComponent, FileProcessor2 {
|
||||||
private val httpClient: HttpClient by inject()
|
private val httpClient: HttpClient by inject()
|
||||||
|
private val resourceFactory: ResourceFactory by inject()
|
||||||
|
|
||||||
suspend fun ocr(resource: Resource): MsOcrResponse {
|
suspend fun ocr(resource: Resource): MsOcrResponse {
|
||||||
val response = httpClient.post {
|
val response = httpClient.post {
|
||||||
|
|
@ -24,7 +35,13 @@ class MsOcr: KoinComponent {
|
||||||
contentType(resource.contentType)
|
contentType(resource.contentType)
|
||||||
setBody(resource.read())
|
setBody(resource.read())
|
||||||
}
|
}
|
||||||
println("got response: ${response.status} in ${response.responseTime}")
|
|
||||||
return response.body()
|
return response.body()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
override val filename = "ms-ocr.json"
|
||||||
|
|
||||||
|
override suspend fun process(resource: Resource): Resource {
|
||||||
|
val result = ocr(resource)
|
||||||
|
return resourceFactory.json(filename, result, MsOcrResponse.serializer())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
dependencies {
|
|
||||||
api(project(":libraries:core-api"))
|
|
||||||
}
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
package de.itkl.io.implementation
|
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.Resource
|
|
||||||
import io.ktor.http.*
|
|
||||||
import java.io.File
|
|
||||||
import java.io.InputStream
|
|
||||||
|
|
||||||
class FileSystemResource(private val file: File) : Resource() {
|
|
||||||
override val filename: String
|
|
||||||
get() = file.name
|
|
||||||
override val contentType: ContentType
|
|
||||||
get() = ContentType.fromFilePath(file.path).first()
|
|
||||||
override val length: Long
|
|
||||||
get() = file.length()
|
|
||||||
|
|
||||||
override fun doRead(): InputStream {
|
|
||||||
return file.inputStream()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
package de.itkl.io
|
|
||||||
|
|
||||||
import de.itkl.core_api.interfaces.NoopResourceReadDecorator
|
|
||||||
import de.itkl.core_api.interfaces.ResourceReadDecorator
|
|
||||||
import org.koin.dsl.module
|
|
||||||
|
|
||||||
val ioModule = module {
|
|
||||||
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
|
|
||||||
}
|
|
||||||
|
|
@ -1,6 +1,9 @@
|
||||||
dependencies {
|
dependencies {
|
||||||
api(project(":libraries:core-api"))
|
api(project(":libraries:core-api"))
|
||||||
api("org.apache.lucene:lucene-analysis-common:9.9.0")
|
api("org.apache.lucene:lucene-analysis-common:9.9.0")
|
||||||
|
api("io.github.piruin:geok:1.2.2")
|
||||||
|
api(project(":libraries:assetmanager"))
|
||||||
|
api("com.soywiz.korge:korge-foundation:5.1.0")
|
||||||
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
|
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
|
||||||
implementation("com.google.guava:guava:32.1.3-jre")
|
implementation("com.google.guava:guava:32.1.3-jre")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
package de.itkl.textprocessing
|
||||||
|
|
||||||
|
import de.itkl.assetmanager.interfaces.Project
|
||||||
|
import de.itkl.assetmanager.interfaces.ProjectManager
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor
|
||||||
|
import de.itkl.core_api.interfaces.ResourceFactory
|
||||||
|
import de.itkl.core_api.interfaces.data.Processable
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.inject
|
||||||
|
import org.koin.java.KoinJavaComponent.inject
|
||||||
|
import java.nio.file.Paths
|
||||||
|
|
||||||
|
private val Log = KotlinLogging.logger { }
|
||||||
|
|
||||||
|
class CorpusFactory : KoinComponent {
|
||||||
|
private val projectManager: ProjectManager by inject()
|
||||||
|
suspend fun load(name: String): Corpus {
|
||||||
|
Log.info { "Open corpus at ${Paths.get(name).toAbsolutePath()}" }
|
||||||
|
return Corpus(projectManager.load(name)).apply {
|
||||||
|
Log.debug { "Found documents in corpus: ${this.documentNames.joinToString("\n")}" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Corpus(private val project: Project): KoinComponent {
|
||||||
|
val displayName get() = project.displayName
|
||||||
|
val documentNames get() = project.documentNames
|
||||||
|
|
||||||
|
private val resourceFactory: ResourceFactory by inject()
|
||||||
|
|
||||||
|
suspend fun document(name: String): Document {
|
||||||
|
return Document(
|
||||||
|
project.resolveName(name),
|
||||||
|
listOf(project.resource(name)!!)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,103 @@
|
||||||
package de.itkl.textprocessing
|
package de.itkl.textprocessing
|
||||||
|
|
||||||
class DocumentContainer {
|
import de.itkl.assetmanager.interfaces.AssetManager
|
||||||
|
import de.itkl.core_api.dtos.MsOcrResponse
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor
|
||||||
|
import de.itkl.core_api.interfaces.FileProcessor2
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import de.itkl.core_api.interfaces.assets.Assets
|
||||||
|
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
|
||||||
|
import de.itkl.core_api.interfaces.data.Processable
|
||||||
|
import korlibs.math.geom.Rectangle
|
||||||
|
import kotlinx.coroutines.flow.Flow
|
||||||
|
import kotlinx.coroutines.flow.asFlow
|
||||||
|
import kotlinx.coroutines.flow.filter
|
||||||
|
import me.piruin.geok.LatLng
|
||||||
|
import me.piruin.geok.geometry.Polygon
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.inject
|
||||||
|
|
||||||
|
class Document(
|
||||||
|
val name: String,
|
||||||
|
val resources: List<Resource>
|
||||||
|
) : Processable, KoinComponent {
|
||||||
|
private val assetManager: AssetManager by inject()
|
||||||
|
private val fileProcessorBackend: FileProcessorBackend by inject()
|
||||||
|
suspend fun assets(): Assets {
|
||||||
|
return assetManager.assets(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads the extracted ocr pages. Note that not every pages
|
||||||
|
* needs to have ocr
|
||||||
|
*/
|
||||||
|
suspend fun retrieveOcrPages(): List<OcrPage> {
|
||||||
|
// TODO: How to identify the assets independently from their name?
|
||||||
|
val resource = checkNotNull(assets()
|
||||||
|
.retrieve("ms-ocr.json")) {
|
||||||
|
"Ocr for $name is not yet created"
|
||||||
|
}
|
||||||
|
val msOcrResponse = resource.json(MsOcrResponse.serializer())
|
||||||
|
return msOcrResponse.analyzeResult.readResults.map { toOcrPage(it) }
|
||||||
|
}
|
||||||
|
override suspend fun process(fileProcessor: FileProcessor2) {
|
||||||
|
fileProcessorBackend.process(
|
||||||
|
resources.first(),
|
||||||
|
assets(),
|
||||||
|
fileProcessor
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun toOcrPage(readResult: MsOcrResponse.AnalyzeResult.ReadResult): OcrPage {
|
||||||
|
return OcrPage(
|
||||||
|
pageNumber = readResult.page,
|
||||||
|
width = readResult.width,
|
||||||
|
height = readResult.height,
|
||||||
|
words = readResult.lines.flatMap { line -> line.words.map { toOcrWord(it) } }
|
||||||
|
)
|
||||||
|
}
|
||||||
|
private fun toOcrWord(word: MsOcrResponse.AnalyzeResult.ReadResult.Line.Word): OcrPage.OcrWord {
|
||||||
|
val box = word.boundingBox
|
||||||
|
return OcrPage.OcrWord(
|
||||||
|
Rectangle(
|
||||||
|
x = box[0],
|
||||||
|
y = box[1],
|
||||||
|
width = box[2] - box[0],
|
||||||
|
height = box[7] - box[1]),
|
||||||
|
// polygon = Polygon(listOf(
|
||||||
|
// LatLng(box[0].toDouble(), box[1].toDouble()),
|
||||||
|
// LatLng(box[2].toDouble(), box[3].toDouble()),
|
||||||
|
// LatLng(box[4].toDouble(), box[5].toDouble()),
|
||||||
|
// LatLng(box[6].toDouble(), box[7].toDouble()),
|
||||||
|
// )),
|
||||||
|
text = word.text
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class OcrPage(
|
||||||
|
val width: Int,
|
||||||
|
val height: Int,
|
||||||
|
val pageNumber: Int,
|
||||||
|
val words: List<OcrWord>,
|
||||||
|
// val regions: List<DocumentRegion> = emptyList()
|
||||||
|
) {
|
||||||
|
// inner class DocumentRegion(
|
||||||
|
// private val polygon: Polygon,
|
||||||
|
// private val type: String,
|
||||||
|
// ) {
|
||||||
|
// fun words(): Flow<OcrWord> {
|
||||||
|
// return words
|
||||||
|
// .asFlow()
|
||||||
|
// .filter { word -> word.polygon.intersectionWith(polygon) != null }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
fun addOcrWord(rectangle: Rectangle, text: String): OcrWord {
|
||||||
|
return OcrWord(rectangle, text)
|
||||||
|
}
|
||||||
|
class OcrWord(
|
||||||
|
val rectangle: Rectangle,
|
||||||
|
val text: String
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
@ -1,11 +1,13 @@
|
||||||
//pluginManagement {
|
|
||||||
// includeBuild("build-logic")
|
|
||||||
//}
|
|
||||||
|
|
||||||
plugins {
|
plugins {
|
||||||
id("org.gradle.toolchains.foojay-resolver-convention") version "0.4.0"
|
id("org.gradle.toolchains.foojay-resolver-convention") version "0.4.0"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rootProject.name = "docthor"
|
||||||
|
|
||||||
|
fun includeDirs(vararg paths: String) {
|
||||||
|
paths.forEach(this::includeDir)
|
||||||
|
}
|
||||||
|
|
||||||
fun includeDir(path: String) {
|
fun includeDir(path: String) {
|
||||||
file(path)
|
file(path)
|
||||||
.listFiles()!!
|
.listFiles()!!
|
||||||
|
|
@ -18,8 +20,9 @@ fun includeDir(path: String) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rootProject.name = "docthor"
|
|
||||||
include(
|
include(
|
||||||
"app",
|
"app",
|
||||||
)
|
)
|
||||||
includeDir("libraries")
|
includeDirs(
|
||||||
|
"apps",
|
||||||
|
"libraries")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue