Compare commits
14 Commits
9f3813a83a
...
5a3f4031d2
| Author | SHA1 | Date |
|---|---|---|
|
|
5a3f4031d2 | |
|
|
d23b4f472c | |
|
|
97b5444159 | |
|
|
2cab145008 | |
|
|
9ea725fc36 | |
|
|
accdfbca67 | |
|
|
7ed5a39bac | |
|
|
4ae5c3bf58 | |
|
|
949f87800a | |
|
|
ac412385bb | |
|
|
8ef054baa4 | |
|
|
a2483c85d7 | |
|
|
b4ab91e1db | |
|
|
c758d0b79d |
|
|
@ -1,4 +1,4 @@
|
|||
.gradle
|
||||
build
|
||||
.idea
|
||||
assets
|
||||
/assets
|
||||
|
|
@ -7,4 +7,5 @@
|
|||
start-page="docthor.md">
|
||||
|
||||
<toc-element topic="docthor.md"/>
|
||||
<toc-element topic="Snippets.md"/>
|
||||
</instance-profile>
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
# Snippets
|
||||
|
||||
## Scale a Shape alongside ZoomImage
|
||||
|
||||
|
||||
```kotlin
|
||||
|
||||
@Composable
|
||||
fun shapes(zoomableState: ZoomableState) {
|
||||
Box(modifier = Modifier.fillMaxSize()) {
|
||||
val scaleX = zoomableState.transform.scaleX
|
||||
val scaleY = zoomableState.transform.scaleY
|
||||
Box(
|
||||
modifier = Modifier
|
||||
.offset { IntOffset(
|
||||
((zoomableState.transform.offset.x + (288 * scaleX)) ).toInt(),
|
||||
((zoomableState.transform.offset.y + (697 * scaleY)) ).toInt()
|
||||
) }
|
||||
.clip(RectangleShape)
|
||||
.size(100.dp * scaleX)
|
||||
.background(Color.Red)
|
||||
)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Scale a Canvas alongside Zoomimage
|
||||
|
||||
```kotlin
|
||||
drawRect(
|
||||
Color.Blue,
|
||||
topLeft = zoomableState.transform.offset + (Offset(288 * zoomableState.transform.scaleX,697 * zoomableState.transform.scaleY)),
|
||||
size = Size( (793 - 288)* zoomableState.transform.scaleX, (741 - 697) * zoomableState.transform.scaleY),
|
||||
style = Stroke(width = 5f)
|
||||
)
|
||||
```
|
||||
|
|
@ -11,6 +11,15 @@ Asset can be found under <path>memento:/mnt/wd/export/data</path>
|
|||
<def title="PDF Renderer for Compose">
|
||||
<a href="https://github.com/GRizzi91/bouquet">bouquet</a>
|
||||
</def>
|
||||
<def title="Moko Resource">
|
||||
<a href="https://github.com/icerockdev/moko-resources">Resource Management für Compose</a>
|
||||
</def>
|
||||
<def title="Aurora">
|
||||
<a href="https://github.com/kirill-grouchnikov/aurora">Building modern, elegant and fast desktop Compose applications</a>
|
||||
</def>
|
||||
<def title="Zoomimage">
|
||||
<a href="https://github.com/panpf/zoomimage">Zooming an Image</a>
|
||||
</def>
|
||||
</deflist>
|
||||
|
||||
## Modules - Libraries
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
plugins {
|
||||
id("docthor.kotlin-application-conventions")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class ComputeIdf : CliktCommand() {
|
|||
.required()
|
||||
|
||||
override fun run() = runBlocking {
|
||||
TfIdfPipeline(force = true)
|
||||
TfIdfPipeline(force = false)
|
||||
.input(corpus)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,31 @@
|
|||
plugins {
|
||||
id("org.jetbrains.compose") version "1.5.11"
|
||||
}
|
||||
|
||||
repositories {
|
||||
google()
|
||||
}
|
||||
|
||||
dependencies {
|
||||
fun addProjects(vararg names: String) {
|
||||
names.forEach {
|
||||
implementation(project(":libraries:$it"))
|
||||
}
|
||||
}
|
||||
|
||||
addProjects(
|
||||
"assetmanager",
|
||||
"core-api",
|
||||
"textprocessing",
|
||||
"httpClient",
|
||||
"tui",
|
||||
)
|
||||
|
||||
implementation("org.pushing-pixels:aurora-theming:1.3.0")
|
||||
implementation("org.pushing-pixels:aurora-component:1.3.0")
|
||||
implementation("org.pushing-pixels:aurora-window:1.3.0")
|
||||
implementation(compose.desktop.currentOs)
|
||||
implementation("io.github.panpf.zoomimage:zoomimage-compose:1.0.0-beta11")
|
||||
implementation("io.github.panpf.zoomimage:zoomimage-compose-desktop:1.0.0-beta11")
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,235 @@
|
|||
package de.itkl.documentViewer
|
||||
|
||||
import androidx.compose.foundation.*
|
||||
import androidx.compose.foundation.layout.*
|
||||
import androidx.compose.material.Text
|
||||
import androidx.compose.runtime.*
|
||||
import androidx.compose.ui.Alignment
|
||||
import androidx.compose.ui.ExperimentalComposeUiApi
|
||||
import androidx.compose.ui.Modifier
|
||||
import androidx.compose.ui.draw.clip
|
||||
import androidx.compose.ui.geometry.Offset
|
||||
import androidx.compose.ui.geometry.Size
|
||||
import androidx.compose.ui.graphics.Color
|
||||
import androidx.compose.ui.graphics.ImageBitmap
|
||||
import androidx.compose.ui.graphics.RectangleShape
|
||||
import androidx.compose.ui.graphics.drawscope.Stroke
|
||||
import androidx.compose.ui.graphics.painter.BitmapPainter
|
||||
import androidx.compose.ui.graphics.painter.Painter
|
||||
import androidx.compose.ui.layout.ContentScale
|
||||
import androidx.compose.ui.res.loadImageBitmap
|
||||
import androidx.compose.ui.unit.DpSize
|
||||
import androidx.compose.ui.unit.IntOffset
|
||||
import androidx.compose.ui.unit.dp
|
||||
import androidx.compose.ui.window.WindowPlacement
|
||||
import androidx.compose.ui.window.WindowPosition
|
||||
import androidx.compose.ui.window.rememberWindowState
|
||||
import com.github.panpf.zoomimage.ZoomImage
|
||||
import com.github.panpf.zoomimage.compose.ZoomState
|
||||
import com.github.panpf.zoomimage.compose.rememberZoomState
|
||||
import com.github.panpf.zoomimage.compose.zoom.*
|
||||
import de.itkl.assetmanager.assetManagerModule
|
||||
import de.itkl.core_api.coreApiModule
|
||||
import de.itkl.httpClient.clients.MsOcr
|
||||
import de.itkl.httpClient.httpClientModule
|
||||
import de.itkl.textprocessing.CorpusFactory
|
||||
import de.itkl.textprocessing.Document
|
||||
import de.itkl.textprocessing.OcrPage
|
||||
import de.itkl.textprocessing.textProcessingModule
|
||||
import de.itkl.tui.tuiModule
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.withContext
|
||||
import org.pushingpixels.aurora.theming.auroraBackground
|
||||
import org.pushingpixels.aurora.theming.marinerSkin
|
||||
import org.pushingpixels.aurora.window.AuroraWindow
|
||||
import org.pushingpixels.aurora.window.AuroraWindowTitlePaneConfigurations
|
||||
import org.pushingpixels.aurora.window.auroraApplication
|
||||
import java.io.File
|
||||
import java.io.IOException
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.koin.core.component.KoinComponent
|
||||
import org.koin.core.component.inject
|
||||
import org.koin.core.context.startKoin
|
||||
import com.github.panpf.zoomimage.util.Logger as ZoomLogger
|
||||
|
||||
private val Log = KotlinLogging.logger { }
|
||||
|
||||
|
||||
class DocumentViewer : KoinComponent {
|
||||
suspend fun loadTestDocument(): Document {
|
||||
val corpus = CorpusFactory().load("assets/xs-reg")
|
||||
val document = corpus.document("00001.jpg")
|
||||
val ocrExtractor: MsOcr by inject()
|
||||
document.process(ocrExtractor)
|
||||
return document
|
||||
}
|
||||
}
|
||||
|
||||
fun main() = auroraApplication {
|
||||
startKoin {
|
||||
modules(
|
||||
coreApiModule,
|
||||
textProcessingModule,
|
||||
tuiModule,
|
||||
assetManagerModule,
|
||||
httpClientModule)
|
||||
}
|
||||
|
||||
val document = runBlocking {
|
||||
DocumentViewer().loadTestDocument()
|
||||
}
|
||||
|
||||
val state = rememberWindowState(
|
||||
placement = WindowPlacement.Floating,
|
||||
position = WindowPosition.Aligned(Alignment.Center),
|
||||
size = DpSize(1000. dp, 800.dp)
|
||||
)
|
||||
AuroraWindow(
|
||||
skin = marinerSkin(),
|
||||
title = "Document Viewer",
|
||||
state = state,
|
||||
windowTitlePaneConfiguration = AuroraWindowTitlePaneConfigurations.AuroraPlain(),
|
||||
onCloseRequest = ::exitApplication
|
||||
) {
|
||||
viewImage(document)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Composable
|
||||
fun viewImage(document: Document) {
|
||||
val ocr = remember { runBlocking { document.retrieveOcrPages().first() } }
|
||||
Column (
|
||||
modifier = Modifier.fillMaxSize().auroraBackground()
|
||||
) {
|
||||
val state = rememberZoomState(logger = ZoomLogger("zoom", level = ZoomLogger.INFO))
|
||||
Text("${state.zoomable.transform.scale} ${state.zoomable.transform.offset}")
|
||||
Box(
|
||||
modifier = Modifier.fillMaxSize()
|
||||
) {
|
||||
ZoomedImage(
|
||||
state = state,
|
||||
load = { loadImageBitmap(File("assets/xs-reg/00001.jpg")) },
|
||||
painterFor = { remember { BitmapPainter(it) } },
|
||||
contentDescription = "Sample",
|
||||
modifier = Modifier.fillMaxSize()
|
||||
)
|
||||
canvas(state.zoomable, ocr)
|
||||
// shapes(state.zoomable)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Composable
|
||||
fun <T> ZoomedImage(
|
||||
state: ZoomState,
|
||||
load: suspend () -> T,
|
||||
painterFor: @Composable (T) -> Painter,
|
||||
contentDescription: String,
|
||||
modifier: Modifier = Modifier,
|
||||
contentScale: ContentScale = ContentScale.Fit,
|
||||
) {
|
||||
val image: T? by produceState<T?>(null) {
|
||||
value = withContext(Dispatchers.IO) {
|
||||
try {
|
||||
load()
|
||||
} catch (e: IOException) {
|
||||
// instead of printing to console, you can also write this to log,
|
||||
// or show some error placeholder
|
||||
e.printStackTrace()
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (image != null) {
|
||||
val scrollBar = remember {
|
||||
ScrollBarSpec(
|
||||
color = Color.Red,
|
||||
size = 6.dp,
|
||||
margin = 12.dp,
|
||||
)
|
||||
}
|
||||
ZoomImage(
|
||||
painter = painterFor(image!!),
|
||||
contentDescription = contentDescription,
|
||||
contentScale = contentScale,
|
||||
modifier = modifier,
|
||||
scrollBar = scrollBar,
|
||||
state = state
|
||||
)
|
||||
}
|
||||
}
|
||||
fun loadImageBitmap(file: File): ImageBitmap =
|
||||
file.inputStream().buffered().use(::loadImageBitmap)
|
||||
|
||||
data class PointConverter(
|
||||
val docWidth: Int,
|
||||
val docHeight: Int,
|
||||
val canvasWidth: Float,
|
||||
val canvasHeight: Float
|
||||
) {
|
||||
fun convertX(x: Int): Float {
|
||||
val xf = x.toFloat()
|
||||
val relXf = docWidth / xf
|
||||
val scaledXf = canvasWidth * relXf
|
||||
// println("X: $scaledXf")
|
||||
return scaledXf
|
||||
}
|
||||
fun convertY(y: Int): Float {
|
||||
val yf = y.toFloat()
|
||||
val relYf = docHeight / yf
|
||||
val scaledYf = canvasHeight * relYf
|
||||
// println("Y: $scaledYf")
|
||||
return scaledYf
|
||||
}
|
||||
}
|
||||
|
||||
@OptIn(ExperimentalFoundationApi::class)
|
||||
@Composable
|
||||
fun shapes(zoomableState: ZoomableState) {
|
||||
Box(modifier = Modifier.fillMaxSize()) {
|
||||
val scaleX = zoomableState.transform.scaleX
|
||||
val scaleY = zoomableState.transform.scaleY
|
||||
Box(
|
||||
modifier = Modifier
|
||||
.offset { IntOffset(
|
||||
((zoomableState.transform.offset.x + (288 * scaleX)) ).toInt(),
|
||||
((zoomableState.transform.offset.y + (697 * scaleY)) ).toInt()
|
||||
) }
|
||||
.clip(RectangleShape)
|
||||
.size(100.dp * scaleX)
|
||||
.background(Color.Red)
|
||||
)
|
||||
}
|
||||
}
|
||||
@OptIn(ExperimentalComposeUiApi::class)
|
||||
@Composable
|
||||
fun canvas(zoomableState: ZoomableState, first: OcrPage) {
|
||||
Canvas(modifier = Modifier
|
||||
.fillMaxSize()
|
||||
// .onPointerEvent(PointerEventType.Move) {
|
||||
// val position = it.changes.first().position
|
||||
// println(position)
|
||||
// }
|
||||
)
|
||||
{
|
||||
val converter = PointConverter(
|
||||
docWidth = 2481,
|
||||
docHeight = 3507,
|
||||
canvasWidth = this.size.width,
|
||||
canvasHeight = this.size.height
|
||||
)
|
||||
|
||||
first.words.forEach { word ->
|
||||
val rect = word.rectangle
|
||||
drawRect(
|
||||
Color.Blue,
|
||||
topLeft = zoomableState.transform.offset + (Offset(rect.x.toFloat() * zoomableState.transform.scaleX,rect.y.toFloat() * zoomableState.transform.scaleY)),
|
||||
size = Size(rect.width.toFloat() * zoomableState.transform.scaleX, rect.height.toFloat() * zoomableState.transform.scaleY),
|
||||
style = Stroke(width = 5f)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,7 @@
|
|||
project(":libraries").subprojects {
|
||||
apply(plugin = "docthor.kotlin-library-conventions")
|
||||
}
|
||||
|
||||
project(":apps").subprojects {
|
||||
apply(plugin = "docthor.kotlin-application-conventions")
|
||||
}
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
import gradle.kotlin.dsl.accessors._d9dcfd1a467b0b6fe90c5571a57aa558.api
|
||||
import org.gradle.api.plugins.jvm.JvmTestSuite
|
||||
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
|
||||
|
||||
|
|
@ -17,6 +18,7 @@ dependencies {
|
|||
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
|
||||
|
||||
|
||||
api("io.github.oshai:kotlin-logging-jvm:5.1.0")
|
||||
testImplementation("io.insert-koin:koin-test:$koin_version")
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
[versions]
|
||||
kotlin = "1.9.21"
|
||||
coroutines = "1.7.3"
|
||||
compose = "1.5.11"
|
||||
dokka = "1.9.10"
|
||||
batik = "1.17"
|
||||
|
||||
versionchecker = "0.50.0"
|
||||
mavenpublish = "0.25.3"
|
||||
|
||||
[libraries]
|
||||
compose-desktop = { module = "org.jetbrains.compose:compose-gradle-plugin", version.ref = "compose" }
|
||||
kotlin-gradlePlugin = { module = "org.jetbrains.kotlin:kotlin-gradle-plugin", version.ref = "kotlin" }
|
||||
kotlin-coroutines = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version.ref = "coroutines" }
|
||||
dokka-gradlePlugin = { module = "org.jetbrains.dokka:dokka-gradle-plugin", version.ref = "dokka"}
|
||||
batik = { module = "org.apache.xmlgraphics:batik-all", version.ref = "batik" }
|
||||
|
||||
versionchecker-gradlePlugin = { module = "com.github.ben-manes:gradle-versions-plugin", version.ref = "versionchecker" }
|
||||
mavenpublish-gradlePlugin = { module = "com.vanniktech:gradle-maven-publish-plugin", version.ref = "mavenpublish" }
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
dependencies {
|
||||
api(project(":libraries:core-api"))
|
||||
// used for contentType
|
||||
api("io.ktor:ktor-http-jvm:2.3.7")
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
package de.itkl.assetmanager
|
||||
|
||||
import de.itkl.assetmanager.implementation.AssetsFileProcessorBackend
|
||||
import de.itkl.assetmanager.implementation.FilesystemAssetManager
|
||||
import de.itkl.assetmanager.implementation.FilesystemProjectManager
|
||||
import de.itkl.assetmanager.interfaces.AssetManager
|
||||
import de.itkl.assetmanager.interfaces.ProjectManager
|
||||
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
|
||||
import org.koin.dsl.module
|
||||
|
||||
val assetManagerModule = module {
|
||||
single<ProjectManager> { FilesystemProjectManager() }
|
||||
single<AssetManager> { FilesystemAssetManager() }
|
||||
single<FileProcessorBackend> { AssetsFileProcessorBackend() }
|
||||
}
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
package de.itkl.assetmanager.implementation
|
||||
|
||||
import de.itkl.core_api.interfaces.FileProcessor2
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import de.itkl.core_api.interfaces.assets.Assets
|
||||
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import org.koin.core.component.KoinComponent
|
||||
|
||||
private val Log = KotlinLogging.logger { }
|
||||
class AssetsFileProcessorBackend : FileProcessorBackend, KoinComponent {
|
||||
override suspend fun process(resource: Resource, assets: Assets, fileProcessor: FileProcessor2) {
|
||||
Log.debug { "Call processor '${fileProcessor.filename}' on $resource" }
|
||||
if (assets.exists(fileProcessor.filename)) {
|
||||
Log.info { "${fileProcessor.filename} already exists on ${resource}. Skipping" }
|
||||
} else {
|
||||
Log.info { "${fileProcessor.filename} does not yet exists for $resource" }
|
||||
val newResource = fileProcessor.process(resource)
|
||||
assets.store(newResource)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
package de.itkl.assetmanager.implementation
|
||||
|
||||
import de.itkl.assetmanager.interfaces.AssetManager
|
||||
import de.itkl.core_api.interfaces.assets.Assets
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import de.itkl.core_api.interfaces.ResourceFactory
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.flow.FlowCollector
|
||||
import kotlinx.coroutines.flow.emitAll
|
||||
import kotlinx.coroutines.flow.map
|
||||
import kotlinx.coroutines.stream.consumeAsFlow
|
||||
import kotlinx.coroutines.withContext
|
||||
import org.koin.core.component.KoinComponent
|
||||
import org.koin.core.component.inject
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.Paths
|
||||
import kotlin.io.path.deleteExisting
|
||||
import kotlin.io.path.exists
|
||||
import kotlin.io.path.outputStream
|
||||
|
||||
private val Log = KotlinLogging.logger { }
|
||||
class FilesystemAssetManager: AssetManager {
|
||||
override suspend fun assets(name: String): Assets {
|
||||
val path = createAssetsPath(name)
|
||||
withContext(Dispatchers.IO) {
|
||||
Files.createDirectories(path)
|
||||
}
|
||||
return FilesystemAssets(path)
|
||||
}
|
||||
|
||||
override suspend fun delete(name: String) {
|
||||
val path = createAssetsPath(name)
|
||||
withContext(Dispatchers.IO) {
|
||||
Files.delete(path)
|
||||
}
|
||||
}
|
||||
|
||||
private fun createAssetsPath(name: String): Path {
|
||||
return Paths.get(name).parent.resolve("$name.assets.d").toAbsolutePath()
|
||||
}
|
||||
}
|
||||
|
||||
class FilesystemAssets(private val baseDir: Path) : Assets, KoinComponent {
|
||||
|
||||
private val resourceFactory by inject<ResourceFactory>()
|
||||
override suspend fun store(resource: Resource) {
|
||||
val destination = baseDir.resolve(resource.filename)
|
||||
resource.read().use { source ->
|
||||
destination.outputStream().use {output ->
|
||||
withContext(Dispatchers.IO) {
|
||||
source.copyTo(output)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override suspend fun retrieve(name: String): Resource? {
|
||||
val destination = baseDir.resolve(name)
|
||||
if (!destination.exists()) {
|
||||
return null
|
||||
}
|
||||
Log.debug { "Loading file at $destination" }
|
||||
val resource = resourceFactory.file(destination)
|
||||
return resource
|
||||
}
|
||||
|
||||
|
||||
override suspend fun delete(name: String) {
|
||||
val destination = baseDir.resolve(name)
|
||||
withContext(Dispatchers.IO) {
|
||||
destination.deleteExisting()
|
||||
}
|
||||
}
|
||||
|
||||
override suspend fun collect(collector: FlowCollector<Resource>) {
|
||||
val flow = withContext(Dispatchers.IO) {
|
||||
Files.list(baseDir).consumeAsFlow()
|
||||
}
|
||||
.map { path -> resourceFactory.file(path) }
|
||||
collector.emitAll(flow)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
package de.itkl.assetmanager.implementation
|
||||
|
||||
import de.itkl.assetmanager.interfaces.AssetManager
|
||||
import de.itkl.core_api.interfaces.assets.Assets
|
||||
import de.itkl.assetmanager.interfaces.Project
|
||||
import de.itkl.assetmanager.interfaces.ProjectManager
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import de.itkl.core_api.interfaces.ResourceFactory
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import kotlinx.coroutines.Dispatchers
|
||||
import kotlinx.coroutines.withContext
|
||||
import org.koin.core.component.KoinComponent
|
||||
import org.koin.core.component.inject
|
||||
import java.nio.file.Paths
|
||||
import kotlin.io.path.isDirectory
|
||||
import kotlin.io.path.isRegularFile
|
||||
import kotlin.io.path.listDirectoryEntries
|
||||
|
||||
private val Log = KotlinLogging.logger { }
|
||||
|
||||
class FilesystemProjectManager : ProjectManager {
|
||||
override suspend fun load(name: String): Project {
|
||||
val path = Paths.get(name)
|
||||
check(path.isDirectory()) {
|
||||
"Currently only directories as corpora are supported"
|
||||
}
|
||||
val documents =
|
||||
withContext(Dispatchers.IO) {
|
||||
path.listDirectoryEntries()
|
||||
.filter { it.isRegularFile() }
|
||||
.map { it.toAbsolutePath() }
|
||||
.map { it.toString() }
|
||||
}
|
||||
return FilesystemProject(
|
||||
name = name,
|
||||
displayName = path.fileName.toString(),
|
||||
documentNames = documents)
|
||||
}
|
||||
}
|
||||
|
||||
class FilesystemProject(
|
||||
override val name: String,
|
||||
override val displayName: String,
|
||||
override val documentNames: List<String>
|
||||
) : Project, KoinComponent {
|
||||
|
||||
private val basePath = Paths.get(name)
|
||||
|
||||
private val assetManager: AssetManager by inject()
|
||||
private val resourceFactory: ResourceFactory by inject()
|
||||
override fun resolveName(name: String): String {
|
||||
return basePath.resolve(name).toAbsolutePath().toString()
|
||||
}
|
||||
|
||||
override suspend fun assets(documentName: String): Assets {
|
||||
return assetManager.assets(documentName)
|
||||
}
|
||||
|
||||
override suspend fun resource(name: String): Resource? {
|
||||
Log.debug { "Project: opening resource of name $name" }
|
||||
|
||||
return resourceFactory.file(basePath.resolve(name))
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
package de.itkl.assetmanager.interfaces
|
||||
|
||||
import de.itkl.core_api.interfaces.assets.Assets
|
||||
|
||||
/**
|
||||
* Manage the assets for one document
|
||||
*/
|
||||
interface AssetManager {
|
||||
suspend fun assets(name: String): Assets
|
||||
suspend fun delete(name: String)
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
package de.itkl.assetmanager.interfaces
|
||||
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import de.itkl.core_api.interfaces.assets.Assets
|
||||
|
||||
/**
|
||||
* A set of documents. Each can hold its own assets
|
||||
*/
|
||||
interface Project {
|
||||
val name: String
|
||||
val displayName: String
|
||||
val documentNames: List<String>
|
||||
|
||||
fun resolveName(name: String): String
|
||||
suspend fun assets(documentName: String): Assets
|
||||
suspend fun resource(name: String): Resource?
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
package de.itkl.assetmanager.interfaces
|
||||
|
||||
interface ProjectManager {
|
||||
suspend fun load(name: String): Project
|
||||
}
|
||||
|
|
@ -1,3 +1,7 @@
|
|||
plugins {
|
||||
kotlin("plugin.serialization") version embeddedKotlinVersion
|
||||
}
|
||||
|
||||
dependencies {
|
||||
// used for contentType
|
||||
api("io.ktor:ktor-http-jvm:2.3.7")
|
||||
|
|
|
|||
|
|
@ -1,11 +1,8 @@
|
|||
package de.itkl.core_api
|
||||
|
||||
import de.itkl.core_api.interfaces.NoopResourceReadDecorator
|
||||
import de.itkl.core_api.interfaces.ResourceFactory
|
||||
import de.itkl.core_api.interfaces.ResourceReadDecorator
|
||||
import org.koin.dsl.module
|
||||
|
||||
val coreApiModule = module {
|
||||
single<ResourceFactory> { ResourceFactory()}
|
||||
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
|
||||
}
|
||||
|
|
@ -1,8 +1,7 @@
|
|||
package de.itkl.httpClient.clients
|
||||
package de.itkl.core_api.dtos
|
||||
|
||||
|
||||
import kotlinx.datetime.Instant
|
||||
import kotlinx.datetime.LocalDateTime
|
||||
import kotlinx.serialization.SerialName
|
||||
import kotlinx.serialization.Serializable
|
||||
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
package de.itkl.core_api.implementation
|
||||
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import io.ktor.http.*
|
||||
import kotlinx.serialization.*
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.encodeToStream
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
import java.io.UnsupportedEncodingException
|
||||
import java.nio.file.Path
|
||||
|
||||
class SerializableResource<T : Any> @OptIn(ExperimentalSerializationApi::class) constructor(
|
||||
override val filename: String,
|
||||
override val contentType: ContentType,
|
||||
private val obj: T,
|
||||
private val serializer: SerializationStrategy<T>
|
||||
) : Resource {
|
||||
|
||||
override val length: Long? = null
|
||||
override val file: File? = null
|
||||
override val path: Path? = null
|
||||
|
||||
override fun read(): InputStream {
|
||||
return serialize().byteInputStream()
|
||||
}
|
||||
|
||||
private fun serialize(): String {
|
||||
return when(contentType) {
|
||||
ContentType.Application.Json -> Json.encodeToString(serializer, obj)
|
||||
else -> throw UnsupportedEncodingException("Sorry but $contentType is not supported for Resources")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2,8 +2,14 @@ package de.itkl.core_api.interfaces
|
|||
|
||||
import java.io.File
|
||||
import java.nio.file.Path
|
||||
import java.util.function.Consumer
|
||||
|
||||
interface FileProcessor {
|
||||
fun willProduce(path: Path): Path
|
||||
suspend fun process(resource: Resource): File
|
||||
}
|
||||
|
||||
interface FileProcessor2 {
|
||||
val filename: String
|
||||
suspend fun process(resource: Resource): Resource
|
||||
}
|
||||
|
|
@ -1,11 +1,15 @@
|
|||
package de.itkl.core_api.interfaces
|
||||
|
||||
import io.ktor.http.*
|
||||
import kotlinx.serialization.DeserializationStrategy
|
||||
import kotlinx.serialization.KSerializer
|
||||
import kotlinx.serialization.json.Json
|
||||
import org.koin.core.component.KoinComponent
|
||||
import org.koin.core.component.get
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
import java.nio.file.Path
|
||||
import kotlin.reflect.KClass
|
||||
|
||||
interface Resource {
|
||||
val filename: String
|
||||
|
|
@ -15,8 +19,16 @@ interface Resource {
|
|||
val file: File?
|
||||
val path: Path?
|
||||
fun read(): InputStream
|
||||
|
||||
fun <T: Any> json(deserializer: DeserializationStrategy<T>): T {
|
||||
val string = String(read().readAllBytes())
|
||||
return Json.decodeFromString(deserializer, string)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Automatically adds koin injectable decorators to reading/writing
|
||||
* operations
|
||||
|
|
@ -24,11 +36,10 @@ interface Resource {
|
|||
abstract class AbstractResource : Resource, KoinComponent {
|
||||
abstract fun doRead(): InputStream
|
||||
final override fun read(): InputStream {
|
||||
return length?.let { length ->
|
||||
get<ResourceReadDecorator>().decorate(
|
||||
length = length,
|
||||
doRead()
|
||||
)
|
||||
} ?: doRead()
|
||||
return doRead()
|
||||
}
|
||||
|
||||
override fun toString(): String {
|
||||
return filename
|
||||
}
|
||||
}
|
||||
|
|
@ -2,13 +2,31 @@ package de.itkl.core_api.interfaces
|
|||
|
||||
import de.itkl.core_api.implementation.FileResource
|
||||
import de.itkl.core_api.implementation.ProgressResource
|
||||
import de.itkl.core_api.implementation.SerializableResource
|
||||
import io.ktor.http.*
|
||||
import kotlinx.serialization.SerializationStrategy
|
||||
import org.koin.core.component.KoinComponent
|
||||
import org.koin.core.component.inject
|
||||
import java.io.File
|
||||
import java.nio.file.Path
|
||||
import java.nio.file.Paths
|
||||
|
||||
class ResourceFactory : KoinComponent {
|
||||
|
||||
private val progressBarFactory by inject<ProgressBarFactory>()
|
||||
fun <T : Any> json(name: String, obj: T, serializationStrategy: SerializationStrategy<T>): Resource {
|
||||
return SerializableResource<T>(
|
||||
filename = name,
|
||||
contentType = ContentType.Application.Json,
|
||||
obj = obj,
|
||||
serializer = serializationStrategy)
|
||||
}
|
||||
fun file(path: String): Resource {
|
||||
return file(Paths.get(path))
|
||||
}
|
||||
fun file(path: Path): Resource {
|
||||
return file(path.toFile())
|
||||
}
|
||||
fun file(file: File): Resource {
|
||||
val resource = FileResource(file)
|
||||
return ProgressResource(resource, progressBarFactory)
|
||||
|
|
|
|||
|
|
@ -1,15 +1,3 @@
|
|||
package de.itkl.core_api.interfaces
|
||||
|
||||
import java.io.InputStream
|
||||
|
||||
interface ResourceReadDecorator {
|
||||
fun decorate(
|
||||
length: Long,
|
||||
inputStream: InputStream): InputStream
|
||||
}
|
||||
|
||||
class NoopResourceReadDecorator : ResourceReadDecorator {
|
||||
override fun decorate(length: Long, inputStream: InputStream): InputStream {
|
||||
return inputStream
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
package de.itkl.core_api.interfaces.assets
|
||||
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import kotlinx.coroutines.flow.Flow
|
||||
import java.util.function.Consumer
|
||||
|
||||
interface Assets : Flow<Resource> {
|
||||
suspend fun store(resource: Resource)
|
||||
suspend fun retrieve(name: String): Resource?
|
||||
suspend fun delete(name: String)
|
||||
|
||||
suspend fun exists(name: String): Boolean {
|
||||
return retrieve(name) != null
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package de.itkl.core_api.interfaces.assets
|
||||
|
||||
import de.itkl.core_api.interfaces.FileProcessor
|
||||
import de.itkl.core_api.interfaces.FileProcessor2
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
|
||||
/**
|
||||
* Executes a [FileProcessor2] on a [Resource]. It decides if and when
|
||||
* the [FileProcessor2.process] should be called and what should happen with the result
|
||||
*/
|
||||
interface FileProcessorBackend {
|
||||
suspend fun process(
|
||||
resource: Resource,
|
||||
assets: Assets,
|
||||
fileProcessor: FileProcessor2)
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
package de.itkl.core_api.interfaces.data
|
||||
|
||||
import de.itkl.core_api.interfaces.FileProcessor
|
||||
import de.itkl.core_api.interfaces.FileProcessor2
|
||||
|
||||
interface Processable {
|
||||
suspend fun process(fileProcessor: FileProcessor2)
|
||||
}
|
||||
|
|
@ -1,18 +1,29 @@
|
|||
package de.itkl.httpClient.clients
|
||||
|
||||
import de.itkl.core_api.dtos.MsOcrResponse
|
||||
import de.itkl.core_api.interfaces.FileProcessor
|
||||
import de.itkl.core_api.interfaces.FileProcessor2
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import de.itkl.core_api.interfaces.ResourceFactory
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import io.ktor.client.*
|
||||
import io.ktor.client.call.*
|
||||
import io.ktor.client.request.*
|
||||
import io.ktor.client.statement.*
|
||||
import io.ktor.client.utils.EmptyContent.contentType
|
||||
import io.ktor.http.*
|
||||
import kotlinx.serialization.json.Json
|
||||
import org.koin.core.component.KoinComponent
|
||||
import org.koin.core.component.inject
|
||||
import java.io.File
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.nameWithoutExtension
|
||||
import kotlin.io.path.writeText
|
||||
|
||||
private val Log = KotlinLogging.logger { }
|
||||
class MsOcr: KoinComponent {
|
||||
class MsOcr: KoinComponent, FileProcessor2 {
|
||||
private val httpClient: HttpClient by inject()
|
||||
private val resourceFactory: ResourceFactory by inject()
|
||||
|
||||
suspend fun ocr(resource: Resource): MsOcrResponse {
|
||||
val response = httpClient.post {
|
||||
|
|
@ -24,7 +35,13 @@ class MsOcr: KoinComponent {
|
|||
contentType(resource.contentType)
|
||||
setBody(resource.read())
|
||||
}
|
||||
println("got response: ${response.status} in ${response.responseTime}")
|
||||
return response.body()
|
||||
}
|
||||
|
||||
override val filename = "ms-ocr.json"
|
||||
|
||||
override suspend fun process(resource: Resource): Resource {
|
||||
val result = ocr(resource)
|
||||
return resourceFactory.json(filename, result, MsOcrResponse.serializer())
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
dependencies {
|
||||
api(project(":libraries:core-api"))
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
package de.itkl.io.implementation
|
||||
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import io.ktor.http.*
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
|
||||
class FileSystemResource(private val file: File) : Resource() {
|
||||
override val filename: String
|
||||
get() = file.name
|
||||
override val contentType: ContentType
|
||||
get() = ContentType.fromFilePath(file.path).first()
|
||||
override val length: Long
|
||||
get() = file.length()
|
||||
|
||||
override fun doRead(): InputStream {
|
||||
return file.inputStream()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
package de.itkl.io
|
||||
|
||||
import de.itkl.core_api.interfaces.NoopResourceReadDecorator
|
||||
import de.itkl.core_api.interfaces.ResourceReadDecorator
|
||||
import org.koin.dsl.module
|
||||
|
||||
val ioModule = module {
|
||||
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
|
||||
}
|
||||
|
|
@ -1,6 +1,9 @@
|
|||
dependencies {
|
||||
api(project(":libraries:core-api"))
|
||||
api("org.apache.lucene:lucene-analysis-common:9.9.0")
|
||||
api("io.github.piruin:geok:1.2.2")
|
||||
api(project(":libraries:assetmanager"))
|
||||
api("com.soywiz.korge:korge-foundation:5.1.0")
|
||||
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
|
||||
implementation("com.google.guava:guava:32.1.3-jre")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,37 @@
|
|||
package de.itkl.textprocessing
|
||||
|
||||
import de.itkl.assetmanager.interfaces.Project
|
||||
import de.itkl.assetmanager.interfaces.ProjectManager
|
||||
import de.itkl.core_api.interfaces.FileProcessor
|
||||
import de.itkl.core_api.interfaces.ResourceFactory
|
||||
import de.itkl.core_api.interfaces.data.Processable
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import org.koin.core.component.KoinComponent
|
||||
import org.koin.core.component.inject
|
||||
import org.koin.java.KoinJavaComponent.inject
|
||||
import java.nio.file.Paths
|
||||
|
||||
private val Log = KotlinLogging.logger { }
|
||||
|
||||
class CorpusFactory : KoinComponent {
|
||||
private val projectManager: ProjectManager by inject()
|
||||
suspend fun load(name: String): Corpus {
|
||||
Log.info { "Open corpus at ${Paths.get(name).toAbsolutePath()}" }
|
||||
return Corpus(projectManager.load(name)).apply {
|
||||
Log.debug { "Found documents in corpus: ${this.documentNames.joinToString("\n")}" }
|
||||
}
|
||||
}
|
||||
}
|
||||
class Corpus(private val project: Project): KoinComponent {
|
||||
val displayName get() = project.displayName
|
||||
val documentNames get() = project.documentNames
|
||||
|
||||
private val resourceFactory: ResourceFactory by inject()
|
||||
|
||||
suspend fun document(name: String): Document {
|
||||
return Document(
|
||||
project.resolveName(name),
|
||||
listOf(project.resource(name)!!)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,103 @@
|
|||
package de.itkl.textprocessing
|
||||
|
||||
class DocumentContainer {
|
||||
import de.itkl.assetmanager.interfaces.AssetManager
|
||||
import de.itkl.core_api.dtos.MsOcrResponse
|
||||
import de.itkl.core_api.interfaces.FileProcessor
|
||||
import de.itkl.core_api.interfaces.FileProcessor2
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import de.itkl.core_api.interfaces.assets.Assets
|
||||
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
|
||||
import de.itkl.core_api.interfaces.data.Processable
|
||||
import korlibs.math.geom.Rectangle
|
||||
import kotlinx.coroutines.flow.Flow
|
||||
import kotlinx.coroutines.flow.asFlow
|
||||
import kotlinx.coroutines.flow.filter
|
||||
import me.piruin.geok.LatLng
|
||||
import me.piruin.geok.geometry.Polygon
|
||||
import org.koin.core.component.KoinComponent
|
||||
import org.koin.core.component.inject
|
||||
|
||||
class Document(
|
||||
val name: String,
|
||||
val resources: List<Resource>
|
||||
) : Processable, KoinComponent {
|
||||
private val assetManager: AssetManager by inject()
|
||||
private val fileProcessorBackend: FileProcessorBackend by inject()
|
||||
suspend fun assets(): Assets {
|
||||
return assetManager.assets(name)
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the extracted ocr pages. Note that not every pages
|
||||
* needs to have ocr
|
||||
*/
|
||||
suspend fun retrieveOcrPages(): List<OcrPage> {
|
||||
// TODO: How to identify the assets independently from their name?
|
||||
val resource = checkNotNull(assets()
|
||||
.retrieve("ms-ocr.json")) {
|
||||
"Ocr for $name is not yet created"
|
||||
}
|
||||
val msOcrResponse = resource.json(MsOcrResponse.serializer())
|
||||
return msOcrResponse.analyzeResult.readResults.map { toOcrPage(it) }
|
||||
}
|
||||
override suspend fun process(fileProcessor: FileProcessor2) {
|
||||
fileProcessorBackend.process(
|
||||
resources.first(),
|
||||
assets(),
|
||||
fileProcessor
|
||||
)
|
||||
}
|
||||
|
||||
private fun toOcrPage(readResult: MsOcrResponse.AnalyzeResult.ReadResult): OcrPage {
|
||||
return OcrPage(
|
||||
pageNumber = readResult.page,
|
||||
width = readResult.width,
|
||||
height = readResult.height,
|
||||
words = readResult.lines.flatMap { line -> line.words.map { toOcrWord(it) } }
|
||||
)
|
||||
}
|
||||
private fun toOcrWord(word: MsOcrResponse.AnalyzeResult.ReadResult.Line.Word): OcrPage.OcrWord {
|
||||
val box = word.boundingBox
|
||||
return OcrPage.OcrWord(
|
||||
Rectangle(
|
||||
x = box[0],
|
||||
y = box[1],
|
||||
width = box[2] - box[0],
|
||||
height = box[7] - box[1]),
|
||||
// polygon = Polygon(listOf(
|
||||
// LatLng(box[0].toDouble(), box[1].toDouble()),
|
||||
// LatLng(box[2].toDouble(), box[3].toDouble()),
|
||||
// LatLng(box[4].toDouble(), box[5].toDouble()),
|
||||
// LatLng(box[6].toDouble(), box[7].toDouble()),
|
||||
// )),
|
||||
text = word.text
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class OcrPage(
|
||||
val width: Int,
|
||||
val height: Int,
|
||||
val pageNumber: Int,
|
||||
val words: List<OcrWord>,
|
||||
// val regions: List<DocumentRegion> = emptyList()
|
||||
) {
|
||||
// inner class DocumentRegion(
|
||||
// private val polygon: Polygon,
|
||||
// private val type: String,
|
||||
// ) {
|
||||
// fun words(): Flow<OcrWord> {
|
||||
// return words
|
||||
// .asFlow()
|
||||
// .filter { word -> word.polygon.intersectionWith(polygon) != null }
|
||||
// }
|
||||
// }
|
||||
|
||||
fun addOcrWord(rectangle: Rectangle, text: String): OcrWord {
|
||||
return OcrWord(rectangle, text)
|
||||
}
|
||||
class OcrWord(
|
||||
val rectangle: Rectangle,
|
||||
val text: String
|
||||
)
|
||||
}
|
||||
|
|
@ -1,11 +1,13 @@
|
|||
//pluginManagement {
|
||||
// includeBuild("build-logic")
|
||||
//}
|
||||
|
||||
plugins {
|
||||
id("org.gradle.toolchains.foojay-resolver-convention") version "0.4.0"
|
||||
}
|
||||
|
||||
rootProject.name = "docthor"
|
||||
|
||||
fun includeDirs(vararg paths: String) {
|
||||
paths.forEach(this::includeDir)
|
||||
}
|
||||
|
||||
fun includeDir(path: String) {
|
||||
file(path)
|
||||
.listFiles()!!
|
||||
|
|
@ -18,8 +20,9 @@ fun includeDir(path: String) {
|
|||
}
|
||||
}
|
||||
|
||||
rootProject.name = "docthor"
|
||||
include(
|
||||
"app",
|
||||
)
|
||||
includeDir("libraries")
|
||||
includeDirs(
|
||||
"apps",
|
||||
"libraries")
|
||||
|
|
|
|||
Loading…
Reference in New Issue