Compare commits

..

No commits in common. "5a3f4031d2fdcf806fb0ccb43ddf1ec858e1017b" and "9f3813a83abe26d9a0da9f3402ae0a233b100597" have entirely different histories.

38 changed files with 65 additions and 851 deletions

2
.gitignore vendored
View File

@ -1,4 +1,4 @@
.gradle .gradle
build build
.idea .idea
/assets assets

View File

@ -7,5 +7,4 @@
start-page="docthor.md"> start-page="docthor.md">
<toc-element topic="docthor.md"/> <toc-element topic="docthor.md"/>
<toc-element topic="Snippets.md"/>
</instance-profile> </instance-profile>

View File

@ -1,36 +0,0 @@
# Snippets
## Scale a Shape alongside ZoomImage
```kotlin
@Composable
fun shapes(zoomableState: ZoomableState) {
Box(modifier = Modifier.fillMaxSize()) {
val scaleX = zoomableState.transform.scaleX
val scaleY = zoomableState.transform.scaleY
Box(
modifier = Modifier
.offset { IntOffset(
((zoomableState.transform.offset.x + (288 * scaleX)) ).toInt(),
((zoomableState.transform.offset.y + (697 * scaleY)) ).toInt()
) }
.clip(RectangleShape)
.size(100.dp * scaleX)
.background(Color.Red)
)
}
}
```
### Scale a Canvas alongside Zoomimage
```kotlin
drawRect(
Color.Blue,
topLeft = zoomableState.transform.offset + (Offset(288 * zoomableState.transform.scaleX,697 * zoomableState.transform.scaleY)),
size = Size( (793 - 288)* zoomableState.transform.scaleX, (741 - 697) * zoomableState.transform.scaleY),
style = Stroke(width = 5f)
)
```

View File

@ -11,15 +11,6 @@ Asset can be found under <path>memento:/mnt/wd/export/data</path>
<def title="PDF Renderer for Compose"> <def title="PDF Renderer for Compose">
<a href="https://github.com/GRizzi91/bouquet">bouquet</a> <a href="https://github.com/GRizzi91/bouquet">bouquet</a>
</def> </def>
<def title="Moko Resource">
<a href="https://github.com/icerockdev/moko-resources">Resource Management für Compose</a>
</def>
<def title="Aurora">
<a href="https://github.com/kirill-grouchnikov/aurora">Building modern, elegant and fast desktop Compose applications</a>
</def>
<def title="Zoomimage">
<a href="https://github.com/panpf/zoomimage">Zooming an Image</a>
</def>
</deflist> </deflist>
## Modules - Libraries ## Modules - Libraries

View File

@ -1,4 +1,3 @@
plugins { plugins {
id("docthor.kotlin-application-conventions") id("docthor.kotlin-application-conventions")
} }

View File

@ -23,7 +23,7 @@ class ComputeIdf : CliktCommand() {
.required() .required()
override fun run() = runBlocking { override fun run() = runBlocking {
TfIdfPipeline(force = false) TfIdfPipeline(force = true)
.input(corpus) .input(corpus)
} }
} }

View File

@ -1,31 +0,0 @@
plugins {
id("org.jetbrains.compose") version "1.5.11"
}
repositories {
google()
}
dependencies {
fun addProjects(vararg names: String) {
names.forEach {
implementation(project(":libraries:$it"))
}
}
addProjects(
"assetmanager",
"core-api",
"textprocessing",
"httpClient",
"tui",
)
implementation("org.pushing-pixels:aurora-theming:1.3.0")
implementation("org.pushing-pixels:aurora-component:1.3.0")
implementation("org.pushing-pixels:aurora-window:1.3.0")
implementation(compose.desktop.currentOs)
implementation("io.github.panpf.zoomimage:zoomimage-compose:1.0.0-beta11")
implementation("io.github.panpf.zoomimage:zoomimage-compose-desktop:1.0.0-beta11")
}

View File

@ -1,235 +0,0 @@
package de.itkl.documentViewer
import androidx.compose.foundation.*
import androidx.compose.foundation.layout.*
import androidx.compose.material.Text
import androidx.compose.runtime.*
import androidx.compose.ui.Alignment
import androidx.compose.ui.ExperimentalComposeUiApi
import androidx.compose.ui.Modifier
import androidx.compose.ui.draw.clip
import androidx.compose.ui.geometry.Offset
import androidx.compose.ui.geometry.Size
import androidx.compose.ui.graphics.Color
import androidx.compose.ui.graphics.ImageBitmap
import androidx.compose.ui.graphics.RectangleShape
import androidx.compose.ui.graphics.drawscope.Stroke
import androidx.compose.ui.graphics.painter.BitmapPainter
import androidx.compose.ui.graphics.painter.Painter
import androidx.compose.ui.layout.ContentScale
import androidx.compose.ui.res.loadImageBitmap
import androidx.compose.ui.unit.DpSize
import androidx.compose.ui.unit.IntOffset
import androidx.compose.ui.unit.dp
import androidx.compose.ui.window.WindowPlacement
import androidx.compose.ui.window.WindowPosition
import androidx.compose.ui.window.rememberWindowState
import com.github.panpf.zoomimage.ZoomImage
import com.github.panpf.zoomimage.compose.ZoomState
import com.github.panpf.zoomimage.compose.rememberZoomState
import com.github.panpf.zoomimage.compose.zoom.*
import de.itkl.assetmanager.assetManagerModule
import de.itkl.core_api.coreApiModule
import de.itkl.httpClient.clients.MsOcr
import de.itkl.httpClient.httpClientModule
import de.itkl.textprocessing.CorpusFactory
import de.itkl.textprocessing.Document
import de.itkl.textprocessing.OcrPage
import de.itkl.textprocessing.textProcessingModule
import de.itkl.tui.tuiModule
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import org.pushingpixels.aurora.theming.auroraBackground
import org.pushingpixels.aurora.theming.marinerSkin
import org.pushingpixels.aurora.window.AuroraWindow
import org.pushingpixels.aurora.window.AuroraWindowTitlePaneConfigurations
import org.pushingpixels.aurora.window.auroraApplication
import java.io.File
import java.io.IOException
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.runBlocking
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import org.koin.core.context.startKoin
import com.github.panpf.zoomimage.util.Logger as ZoomLogger
private val Log = KotlinLogging.logger { }
class DocumentViewer : KoinComponent {
suspend fun loadTestDocument(): Document {
val corpus = CorpusFactory().load("assets/xs-reg")
val document = corpus.document("00001.jpg")
val ocrExtractor: MsOcr by inject()
document.process(ocrExtractor)
return document
}
}
fun main() = auroraApplication {
startKoin {
modules(
coreApiModule,
textProcessingModule,
tuiModule,
assetManagerModule,
httpClientModule)
}
val document = runBlocking {
DocumentViewer().loadTestDocument()
}
val state = rememberWindowState(
placement = WindowPlacement.Floating,
position = WindowPosition.Aligned(Alignment.Center),
size = DpSize(1000. dp, 800.dp)
)
AuroraWindow(
skin = marinerSkin(),
title = "Document Viewer",
state = state,
windowTitlePaneConfiguration = AuroraWindowTitlePaneConfigurations.AuroraPlain(),
onCloseRequest = ::exitApplication
) {
viewImage(document)
}
}
@Composable
fun viewImage(document: Document) {
val ocr = remember { runBlocking { document.retrieveOcrPages().first() } }
Column (
modifier = Modifier.fillMaxSize().auroraBackground()
) {
val state = rememberZoomState(logger = ZoomLogger("zoom", level = ZoomLogger.INFO))
Text("${state.zoomable.transform.scale} ${state.zoomable.transform.offset}")
Box(
modifier = Modifier.fillMaxSize()
) {
ZoomedImage(
state = state,
load = { loadImageBitmap(File("assets/xs-reg/00001.jpg")) },
painterFor = { remember { BitmapPainter(it) } },
contentDescription = "Sample",
modifier = Modifier.fillMaxSize()
)
canvas(state.zoomable, ocr)
// shapes(state.zoomable)
}
}
}
@Composable
fun <T> ZoomedImage(
state: ZoomState,
load: suspend () -> T,
painterFor: @Composable (T) -> Painter,
contentDescription: String,
modifier: Modifier = Modifier,
contentScale: ContentScale = ContentScale.Fit,
) {
val image: T? by produceState<T?>(null) {
value = withContext(Dispatchers.IO) {
try {
load()
} catch (e: IOException) {
// instead of printing to console, you can also write this to log,
// or show some error placeholder
e.printStackTrace()
null
}
}
}
if (image != null) {
val scrollBar = remember {
ScrollBarSpec(
color = Color.Red,
size = 6.dp,
margin = 12.dp,
)
}
ZoomImage(
painter = painterFor(image!!),
contentDescription = contentDescription,
contentScale = contentScale,
modifier = modifier,
scrollBar = scrollBar,
state = state
)
}
}
fun loadImageBitmap(file: File): ImageBitmap =
file.inputStream().buffered().use(::loadImageBitmap)
data class PointConverter(
val docWidth: Int,
val docHeight: Int,
val canvasWidth: Float,
val canvasHeight: Float
) {
fun convertX(x: Int): Float {
val xf = x.toFloat()
val relXf = docWidth / xf
val scaledXf = canvasWidth * relXf
// println("X: $scaledXf")
return scaledXf
}
fun convertY(y: Int): Float {
val yf = y.toFloat()
val relYf = docHeight / yf
val scaledYf = canvasHeight * relYf
// println("Y: $scaledYf")
return scaledYf
}
}
@OptIn(ExperimentalFoundationApi::class)
@Composable
fun shapes(zoomableState: ZoomableState) {
Box(modifier = Modifier.fillMaxSize()) {
val scaleX = zoomableState.transform.scaleX
val scaleY = zoomableState.transform.scaleY
Box(
modifier = Modifier
.offset { IntOffset(
((zoomableState.transform.offset.x + (288 * scaleX)) ).toInt(),
((zoomableState.transform.offset.y + (697 * scaleY)) ).toInt()
) }
.clip(RectangleShape)
.size(100.dp * scaleX)
.background(Color.Red)
)
}
}
@OptIn(ExperimentalComposeUiApi::class)
@Composable
fun canvas(zoomableState: ZoomableState, first: OcrPage) {
Canvas(modifier = Modifier
.fillMaxSize()
// .onPointerEvent(PointerEventType.Move) {
// val position = it.changes.first().position
// println(position)
// }
)
{
val converter = PointConverter(
docWidth = 2481,
docHeight = 3507,
canvasWidth = this.size.width,
canvasHeight = this.size.height
)
first.words.forEach { word ->
val rect = word.rectangle
drawRect(
Color.Blue,
topLeft = zoomableState.transform.offset + (Offset(rect.x.toFloat() * zoomableState.transform.scaleX,rect.y.toFloat() * zoomableState.transform.scaleY)),
size = Size(rect.width.toFloat() * zoomableState.transform.scaleX, rect.height.toFloat() * zoomableState.transform.scaleY),
style = Stroke(width = 5f)
)
}
}
}

View File

@ -1,7 +1,3 @@
project(":libraries").subprojects { project(":libraries").subprojects {
apply(plugin = "docthor.kotlin-library-conventions") apply(plugin = "docthor.kotlin-library-conventions")
} }
project(":apps").subprojects {
apply(plugin = "docthor.kotlin-application-conventions")
}

View File

@ -1,4 +1,3 @@
import gradle.kotlin.dsl.accessors._d9dcfd1a467b0b6fe90c5571a57aa558.api
import org.gradle.api.plugins.jvm.JvmTestSuite import org.gradle.api.plugins.jvm.JvmTestSuite
import org.jetbrains.kotlin.gradle.dsl.JvmTarget import org.jetbrains.kotlin.gradle.dsl.JvmTarget
@ -18,7 +17,6 @@ dependencies {
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2") implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
api("io.github.oshai:kotlin-logging-jvm:5.1.0")
testImplementation("io.insert-koin:koin-test:$koin_version") testImplementation("io.insert-koin:koin-test:$koin_version")
} }

View File

@ -1,19 +0,0 @@
[versions]
kotlin = "1.9.21"
coroutines = "1.7.3"
compose = "1.5.11"
dokka = "1.9.10"
batik = "1.17"
versionchecker = "0.50.0"
mavenpublish = "0.25.3"
[libraries]
compose-desktop = { module = "org.jetbrains.compose:compose-gradle-plugin", version.ref = "compose" }
kotlin-gradlePlugin = { module = "org.jetbrains.kotlin:kotlin-gradle-plugin", version.ref = "kotlin" }
kotlin-coroutines = { module = "org.jetbrains.kotlinx:kotlinx-coroutines-core", version.ref = "coroutines" }
dokka-gradlePlugin = { module = "org.jetbrains.dokka:dokka-gradle-plugin", version.ref = "dokka"}
batik = { module = "org.apache.xmlgraphics:batik-all", version.ref = "batik" }
versionchecker-gradlePlugin = { module = "com.github.ben-manes:gradle-versions-plugin", version.ref = "versionchecker" }
mavenpublish-gradlePlugin = { module = "com.vanniktech:gradle-maven-publish-plugin", version.ref = "mavenpublish" }

View File

@ -1,5 +0,0 @@
dependencies {
api(project(":libraries:core-api"))
// used for contentType
api("io.ktor:ktor-http-jvm:2.3.7")
}

View File

@ -1,15 +0,0 @@
package de.itkl.assetmanager
import de.itkl.assetmanager.implementation.AssetsFileProcessorBackend
import de.itkl.assetmanager.implementation.FilesystemAssetManager
import de.itkl.assetmanager.implementation.FilesystemProjectManager
import de.itkl.assetmanager.interfaces.AssetManager
import de.itkl.assetmanager.interfaces.ProjectManager
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
import org.koin.dsl.module
val assetManagerModule = module {
single<ProjectManager> { FilesystemProjectManager() }
single<AssetManager> { FilesystemAssetManager() }
single<FileProcessorBackend> { AssetsFileProcessorBackend() }
}

View File

@ -1,22 +0,0 @@
package de.itkl.assetmanager.implementation
import de.itkl.core_api.interfaces.FileProcessor2
import de.itkl.core_api.interfaces.Resource
import de.itkl.core_api.interfaces.assets.Assets
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
import io.github.oshai.kotlinlogging.KotlinLogging
import org.koin.core.component.KoinComponent
private val Log = KotlinLogging.logger { }
class AssetsFileProcessorBackend : FileProcessorBackend, KoinComponent {
override suspend fun process(resource: Resource, assets: Assets, fileProcessor: FileProcessor2) {
Log.debug { "Call processor '${fileProcessor.filename}' on $resource" }
if (assets.exists(fileProcessor.filename)) {
Log.info { "${fileProcessor.filename} already exists on ${resource}. Skipping" }
} else {
Log.info { "${fileProcessor.filename} does not yet exists for $resource" }
val newResource = fileProcessor.process(resource)
assets.store(newResource)
}
}
}

View File

@ -1,84 +0,0 @@
package de.itkl.assetmanager.implementation
import de.itkl.assetmanager.interfaces.AssetManager
import de.itkl.core_api.interfaces.assets.Assets
import de.itkl.core_api.interfaces.Resource
import de.itkl.core_api.interfaces.ResourceFactory
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.flow.FlowCollector
import kotlinx.coroutines.flow.emitAll
import kotlinx.coroutines.flow.map
import kotlinx.coroutines.stream.consumeAsFlow
import kotlinx.coroutines.withContext
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.Paths
import kotlin.io.path.deleteExisting
import kotlin.io.path.exists
import kotlin.io.path.outputStream
private val Log = KotlinLogging.logger { }
class FilesystemAssetManager: AssetManager {
override suspend fun assets(name: String): Assets {
val path = createAssetsPath(name)
withContext(Dispatchers.IO) {
Files.createDirectories(path)
}
return FilesystemAssets(path)
}
override suspend fun delete(name: String) {
val path = createAssetsPath(name)
withContext(Dispatchers.IO) {
Files.delete(path)
}
}
private fun createAssetsPath(name: String): Path {
return Paths.get(name).parent.resolve("$name.assets.d").toAbsolutePath()
}
}
class FilesystemAssets(private val baseDir: Path) : Assets, KoinComponent {
private val resourceFactory by inject<ResourceFactory>()
override suspend fun store(resource: Resource) {
val destination = baseDir.resolve(resource.filename)
resource.read().use { source ->
destination.outputStream().use {output ->
withContext(Dispatchers.IO) {
source.copyTo(output)
}
}
}
}
override suspend fun retrieve(name: String): Resource? {
val destination = baseDir.resolve(name)
if (!destination.exists()) {
return null
}
Log.debug { "Loading file at $destination" }
val resource = resourceFactory.file(destination)
return resource
}
override suspend fun delete(name: String) {
val destination = baseDir.resolve(name)
withContext(Dispatchers.IO) {
destination.deleteExisting()
}
}
override suspend fun collect(collector: FlowCollector<Resource>) {
val flow = withContext(Dispatchers.IO) {
Files.list(baseDir).consumeAsFlow()
}
.map { path -> resourceFactory.file(path) }
collector.emitAll(flow)
}
}

View File

@ -1,64 +0,0 @@
package de.itkl.assetmanager.implementation
import de.itkl.assetmanager.interfaces.AssetManager
import de.itkl.core_api.interfaces.assets.Assets
import de.itkl.assetmanager.interfaces.Project
import de.itkl.assetmanager.interfaces.ProjectManager
import de.itkl.core_api.interfaces.Resource
import de.itkl.core_api.interfaces.ResourceFactory
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import java.nio.file.Paths
import kotlin.io.path.isDirectory
import kotlin.io.path.isRegularFile
import kotlin.io.path.listDirectoryEntries
private val Log = KotlinLogging.logger { }
class FilesystemProjectManager : ProjectManager {
override suspend fun load(name: String): Project {
val path = Paths.get(name)
check(path.isDirectory()) {
"Currently only directories as corpora are supported"
}
val documents =
withContext(Dispatchers.IO) {
path.listDirectoryEntries()
.filter { it.isRegularFile() }
.map { it.toAbsolutePath() }
.map { it.toString() }
}
return FilesystemProject(
name = name,
displayName = path.fileName.toString(),
documentNames = documents)
}
}
class FilesystemProject(
override val name: String,
override val displayName: String,
override val documentNames: List<String>
) : Project, KoinComponent {
private val basePath = Paths.get(name)
private val assetManager: AssetManager by inject()
private val resourceFactory: ResourceFactory by inject()
override fun resolveName(name: String): String {
return basePath.resolve(name).toAbsolutePath().toString()
}
override suspend fun assets(documentName: String): Assets {
return assetManager.assets(documentName)
}
override suspend fun resource(name: String): Resource? {
Log.debug { "Project: opening resource of name $name" }
return resourceFactory.file(basePath.resolve(name))
}
}

View File

@ -1,11 +0,0 @@
package de.itkl.assetmanager.interfaces
import de.itkl.core_api.interfaces.assets.Assets
/**
* Manage the assets for one document
*/
interface AssetManager {
suspend fun assets(name: String): Assets
suspend fun delete(name: String)
}

View File

@ -1,18 +0,0 @@
package de.itkl.assetmanager.interfaces
import de.itkl.core_api.interfaces.Resource
import de.itkl.core_api.interfaces.assets.Assets
/**
* A set of documents. Each can hold its own assets
*/
interface Project {
val name: String
val displayName: String
val documentNames: List<String>
fun resolveName(name: String): String
suspend fun assets(documentName: String): Assets
suspend fun resource(name: String): Resource?
}

View File

@ -1,5 +0,0 @@
package de.itkl.assetmanager.interfaces
interface ProjectManager {
suspend fun load(name: String): Project
}

View File

@ -1,7 +1,3 @@
plugins {
kotlin("plugin.serialization") version embeddedKotlinVersion
}
dependencies { dependencies {
// used for contentType // used for contentType
api("io.ktor:ktor-http-jvm:2.3.7") api("io.ktor:ktor-http-jvm:2.3.7")

View File

@ -1,8 +1,11 @@
package de.itkl.core_api package de.itkl.core_api
import de.itkl.core_api.interfaces.NoopResourceReadDecorator
import de.itkl.core_api.interfaces.ResourceFactory import de.itkl.core_api.interfaces.ResourceFactory
import de.itkl.core_api.interfaces.ResourceReadDecorator
import org.koin.dsl.module import org.koin.dsl.module
val coreApiModule = module { val coreApiModule = module {
single<ResourceFactory> { ResourceFactory()} single<ResourceFactory> { ResourceFactory()}
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
} }

View File

@ -1,34 +0,0 @@
package de.itkl.core_api.implementation
import de.itkl.core_api.interfaces.Resource
import io.ktor.http.*
import kotlinx.serialization.*
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.encodeToStream
import java.io.File
import java.io.InputStream
import java.io.UnsupportedEncodingException
import java.nio.file.Path
class SerializableResource<T : Any> @OptIn(ExperimentalSerializationApi::class) constructor(
override val filename: String,
override val contentType: ContentType,
private val obj: T,
private val serializer: SerializationStrategy<T>
) : Resource {
override val length: Long? = null
override val file: File? = null
override val path: Path? = null
override fun read(): InputStream {
return serialize().byteInputStream()
}
private fun serialize(): String {
return when(contentType) {
ContentType.Application.Json -> Json.encodeToString(serializer, obj)
else -> throw UnsupportedEncodingException("Sorry but $contentType is not supported for Resources")
}
}
}

View File

@ -2,14 +2,8 @@ package de.itkl.core_api.interfaces
import java.io.File import java.io.File
import java.nio.file.Path import java.nio.file.Path
import java.util.function.Consumer
interface FileProcessor { interface FileProcessor {
fun willProduce(path: Path): Path fun willProduce(path: Path): Path
suspend fun process(resource: Resource): File suspend fun process(resource: Resource): File
} }
interface FileProcessor2 {
val filename: String
suspend fun process(resource: Resource): Resource
}

View File

@ -1,15 +1,11 @@
package de.itkl.core_api.interfaces package de.itkl.core_api.interfaces
import io.ktor.http.* import io.ktor.http.*
import kotlinx.serialization.DeserializationStrategy
import kotlinx.serialization.KSerializer
import kotlinx.serialization.json.Json
import org.koin.core.component.KoinComponent import org.koin.core.component.KoinComponent
import org.koin.core.component.get import org.koin.core.component.get
import java.io.File import java.io.File
import java.io.InputStream import java.io.InputStream
import java.nio.file.Path import java.nio.file.Path
import kotlin.reflect.KClass
interface Resource { interface Resource {
val filename: String val filename: String
@ -19,16 +15,8 @@ interface Resource {
val file: File? val file: File?
val path: Path? val path: Path?
fun read(): InputStream fun read(): InputStream
fun <T: Any> json(deserializer: DeserializationStrategy<T>): T {
val string = String(read().readAllBytes())
return Json.decodeFromString(deserializer, string)
}
} }
/** /**
* Automatically adds koin injectable decorators to reading/writing * Automatically adds koin injectable decorators to reading/writing
* operations * operations
@ -36,10 +24,11 @@ interface Resource {
abstract class AbstractResource : Resource, KoinComponent { abstract class AbstractResource : Resource, KoinComponent {
abstract fun doRead(): InputStream abstract fun doRead(): InputStream
final override fun read(): InputStream { final override fun read(): InputStream {
return doRead() return length?.let { length ->
} get<ResourceReadDecorator>().decorate(
length = length,
override fun toString(): String { doRead()
return filename )
} ?: doRead()
} }
} }

View File

@ -2,31 +2,13 @@ package de.itkl.core_api.interfaces
import de.itkl.core_api.implementation.FileResource import de.itkl.core_api.implementation.FileResource
import de.itkl.core_api.implementation.ProgressResource import de.itkl.core_api.implementation.ProgressResource
import de.itkl.core_api.implementation.SerializableResource
import io.ktor.http.*
import kotlinx.serialization.SerializationStrategy
import org.koin.core.component.KoinComponent import org.koin.core.component.KoinComponent
import org.koin.core.component.inject import org.koin.core.component.inject
import java.io.File import java.io.File
import java.nio.file.Path
import java.nio.file.Paths
class ResourceFactory : KoinComponent { class ResourceFactory : KoinComponent {
private val progressBarFactory by inject<ProgressBarFactory>() private val progressBarFactory by inject<ProgressBarFactory>()
fun <T : Any> json(name: String, obj: T, serializationStrategy: SerializationStrategy<T>): Resource {
return SerializableResource<T>(
filename = name,
contentType = ContentType.Application.Json,
obj = obj,
serializer = serializationStrategy)
}
fun file(path: String): Resource {
return file(Paths.get(path))
}
fun file(path: Path): Resource {
return file(path.toFile())
}
fun file(file: File): Resource { fun file(file: File): Resource {
val resource = FileResource(file) val resource = FileResource(file)
return ProgressResource(resource, progressBarFactory) return ProgressResource(resource, progressBarFactory)

View File

@ -1,3 +1,15 @@
package de.itkl.core_api.interfaces package de.itkl.core_api.interfaces
import java.io.InputStream import java.io.InputStream
interface ResourceReadDecorator {
fun decorate(
length: Long,
inputStream: InputStream): InputStream
}
class NoopResourceReadDecorator : ResourceReadDecorator {
override fun decorate(length: Long, inputStream: InputStream): InputStream {
return inputStream
}
}

View File

@ -1,15 +0,0 @@
package de.itkl.core_api.interfaces.assets
import de.itkl.core_api.interfaces.Resource
import kotlinx.coroutines.flow.Flow
import java.util.function.Consumer
interface Assets : Flow<Resource> {
suspend fun store(resource: Resource)
suspend fun retrieve(name: String): Resource?
suspend fun delete(name: String)
suspend fun exists(name: String): Boolean {
return retrieve(name) != null
}
}

View File

@ -1,16 +0,0 @@
package de.itkl.core_api.interfaces.assets
import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.FileProcessor2
import de.itkl.core_api.interfaces.Resource
/**
* Executes a [FileProcessor2] on a [Resource]. It decides if and when
* the [FileProcessor2.process] should be called and what should happen with the result
*/
interface FileProcessorBackend {
suspend fun process(
resource: Resource,
assets: Assets,
fileProcessor: FileProcessor2)
}

View File

@ -1,8 +0,0 @@
package de.itkl.core_api.interfaces.data
import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.FileProcessor2
interface Processable {
suspend fun process(fileProcessor: FileProcessor2)
}

View File

@ -1,29 +1,18 @@
package de.itkl.httpClient.clients package de.itkl.httpClient.clients
import de.itkl.core_api.dtos.MsOcrResponse
import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.FileProcessor2
import de.itkl.core_api.interfaces.Resource import de.itkl.core_api.interfaces.Resource
import de.itkl.core_api.interfaces.ResourceFactory
import io.github.oshai.kotlinlogging.KotlinLogging import io.github.oshai.kotlinlogging.KotlinLogging
import io.ktor.client.* import io.ktor.client.*
import io.ktor.client.call.* import io.ktor.client.call.*
import io.ktor.client.request.* import io.ktor.client.request.*
import io.ktor.client.statement.* import io.ktor.client.statement.*
import io.ktor.client.utils.EmptyContent.contentType
import io.ktor.http.* import io.ktor.http.*
import kotlinx.serialization.json.Json
import org.koin.core.component.KoinComponent import org.koin.core.component.KoinComponent
import org.koin.core.component.inject import org.koin.core.component.inject
import java.io.File
import java.nio.file.Path
import kotlin.io.path.nameWithoutExtension
import kotlin.io.path.writeText
private val Log = KotlinLogging.logger { } private val Log = KotlinLogging.logger { }
class MsOcr: KoinComponent, FileProcessor2 { class MsOcr: KoinComponent {
private val httpClient: HttpClient by inject() private val httpClient: HttpClient by inject()
private val resourceFactory: ResourceFactory by inject()
suspend fun ocr(resource: Resource): MsOcrResponse { suspend fun ocr(resource: Resource): MsOcrResponse {
val response = httpClient.post { val response = httpClient.post {
@ -35,13 +24,7 @@ class MsOcr: KoinComponent, FileProcessor2 {
contentType(resource.contentType) contentType(resource.contentType)
setBody(resource.read()) setBody(resource.read())
} }
println("got response: ${response.status} in ${response.responseTime}")
return response.body() return response.body()
} }
override val filename = "ms-ocr.json"
override suspend fun process(resource: Resource): Resource {
val result = ocr(resource)
return resourceFactory.json(filename, result, MsOcrResponse.serializer())
}
} }

View File

@ -1,7 +1,8 @@
package de.itkl.core_api.dtos package de.itkl.httpClient.clients
import kotlinx.datetime.Instant import kotlinx.datetime.Instant
import kotlinx.datetime.LocalDateTime
import kotlinx.serialization.SerialName import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable import kotlinx.serialization.Serializable

View File

@ -0,0 +1,3 @@
dependencies {
api(project(":libraries:core-api"))
}

View File

@ -0,0 +1,19 @@
package de.itkl.io.implementation
import de.itkl.core_api.interfaces.Resource
import io.ktor.http.*
import java.io.File
import java.io.InputStream
class FileSystemResource(private val file: File) : Resource() {
override val filename: String
get() = file.name
override val contentType: ContentType
get() = ContentType.fromFilePath(file.path).first()
override val length: Long
get() = file.length()
override fun doRead(): InputStream {
return file.inputStream()
}
}

View File

@ -0,0 +1,9 @@
package de.itkl.io
import de.itkl.core_api.interfaces.NoopResourceReadDecorator
import de.itkl.core_api.interfaces.ResourceReadDecorator
import org.koin.dsl.module
val ioModule = module {
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
}

View File

@ -1,9 +1,6 @@
dependencies { dependencies {
api(project(":libraries:core-api")) api(project(":libraries:core-api"))
api("org.apache.lucene:lucene-analysis-common:9.9.0") api("org.apache.lucene:lucene-analysis-common:9.9.0")
api("io.github.piruin:geok:1.2.2")
api(project(":libraries:assetmanager"))
api("com.soywiz.korge:korge-foundation:5.1.0")
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2") implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
implementation("com.google.guava:guava:32.1.3-jre") implementation("com.google.guava:guava:32.1.3-jre")
} }

View File

@ -1,37 +0,0 @@
package de.itkl.textprocessing
import de.itkl.assetmanager.interfaces.Project
import de.itkl.assetmanager.interfaces.ProjectManager
import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.ResourceFactory
import de.itkl.core_api.interfaces.data.Processable
import io.github.oshai.kotlinlogging.KotlinLogging
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import org.koin.java.KoinJavaComponent.inject
import java.nio.file.Paths
private val Log = KotlinLogging.logger { }
class CorpusFactory : KoinComponent {
private val projectManager: ProjectManager by inject()
suspend fun load(name: String): Corpus {
Log.info { "Open corpus at ${Paths.get(name).toAbsolutePath()}" }
return Corpus(projectManager.load(name)).apply {
Log.debug { "Found documents in corpus: ${this.documentNames.joinToString("\n")}" }
}
}
}
class Corpus(private val project: Project): KoinComponent {
val displayName get() = project.displayName
val documentNames get() = project.documentNames
private val resourceFactory: ResourceFactory by inject()
suspend fun document(name: String): Document {
return Document(
project.resolveName(name),
listOf(project.resource(name)!!)
)
}
}

View File

@ -1,103 +1,4 @@
package de.itkl.textprocessing package de.itkl.textprocessing
import de.itkl.assetmanager.interfaces.AssetManager class DocumentContainer {
import de.itkl.core_api.dtos.MsOcrResponse
import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.FileProcessor2
import de.itkl.core_api.interfaces.Resource
import de.itkl.core_api.interfaces.assets.Assets
import de.itkl.core_api.interfaces.assets.FileProcessorBackend
import de.itkl.core_api.interfaces.data.Processable
import korlibs.math.geom.Rectangle
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.asFlow
import kotlinx.coroutines.flow.filter
import me.piruin.geok.LatLng
import me.piruin.geok.geometry.Polygon
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
class Document(
val name: String,
val resources: List<Resource>
) : Processable, KoinComponent {
private val assetManager: AssetManager by inject()
private val fileProcessorBackend: FileProcessorBackend by inject()
suspend fun assets(): Assets {
return assetManager.assets(name)
}
/**
* Loads the extracted ocr pages. Note that not every pages
* needs to have ocr
*/
suspend fun retrieveOcrPages(): List<OcrPage> {
// TODO: How to identify the assets independently from their name?
val resource = checkNotNull(assets()
.retrieve("ms-ocr.json")) {
"Ocr for $name is not yet created"
}
val msOcrResponse = resource.json(MsOcrResponse.serializer())
return msOcrResponse.analyzeResult.readResults.map { toOcrPage(it) }
}
override suspend fun process(fileProcessor: FileProcessor2) {
fileProcessorBackend.process(
resources.first(),
assets(),
fileProcessor
)
}
private fun toOcrPage(readResult: MsOcrResponse.AnalyzeResult.ReadResult): OcrPage {
return OcrPage(
pageNumber = readResult.page,
width = readResult.width,
height = readResult.height,
words = readResult.lines.flatMap { line -> line.words.map { toOcrWord(it) } }
)
}
private fun toOcrWord(word: MsOcrResponse.AnalyzeResult.ReadResult.Line.Word): OcrPage.OcrWord {
val box = word.boundingBox
return OcrPage.OcrWord(
Rectangle(
x = box[0],
y = box[1],
width = box[2] - box[0],
height = box[7] - box[1]),
// polygon = Polygon(listOf(
// LatLng(box[0].toDouble(), box[1].toDouble()),
// LatLng(box[2].toDouble(), box[3].toDouble()),
// LatLng(box[4].toDouble(), box[5].toDouble()),
// LatLng(box[6].toDouble(), box[7].toDouble()),
// )),
text = word.text
)
}
}
class OcrPage(
val width: Int,
val height: Int,
val pageNumber: Int,
val words: List<OcrWord>,
// val regions: List<DocumentRegion> = emptyList()
) {
// inner class DocumentRegion(
// private val polygon: Polygon,
// private val type: String,
// ) {
// fun words(): Flow<OcrWord> {
// return words
// .asFlow()
// .filter { word -> word.polygon.intersectionWith(polygon) != null }
// }
// }
fun addOcrWord(rectangle: Rectangle, text: String): OcrWord {
return OcrWord(rectangle, text)
}
class OcrWord(
val rectangle: Rectangle,
val text: String
)
} }

View File

@ -1,13 +1,11 @@
//pluginManagement {
// includeBuild("build-logic")
//}
plugins { plugins {
id("org.gradle.toolchains.foojay-resolver-convention") version "0.4.0" id("org.gradle.toolchains.foojay-resolver-convention") version "0.4.0"
} }
rootProject.name = "docthor"
fun includeDirs(vararg paths: String) {
paths.forEach(this::includeDir)
}
fun includeDir(path: String) { fun includeDir(path: String) {
file(path) file(path)
.listFiles()!! .listFiles()!!
@ -20,9 +18,8 @@ fun includeDir(path: String) {
} }
} }
rootProject.name = "docthor"
include( include(
"app", "app",
) )
includeDirs( includeDir("libraries")
"apps",
"libraries")