Compare commits

..

6 Commits

Author SHA1 Message Date
Timo Bryant 9f3813a83a starting with ms ocr client 2023-12-29 22:20:33 +01:00
Timo Bryant 30dc3b658d cleanup 2023-12-29 20:45:30 +01:00
Timo Bryant 6fb0ce2a4f move stuff to core-io/tui 2023-12-29 20:42:06 +01:00
Timo Bryant d62aadb95f move ProgressBarFactory to core api 2023-12-27 16:28:51 +01:00
Timo Bryant f777669dfa 7 Add TUI module 2023-12-27 16:16:34 +01:00
Timo Bryant cc727c681a adding core api 2023-12-27 16:11:12 +01:00
38 changed files with 398 additions and 123 deletions

View File

@ -0,0 +1,36 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="docthor [clean]" type="GradleRunConfiguration" factoryName="Gradle" nameIsGenerated="true">
<ExternalSystemSettings>
<option name="executionName" />
<option name="externalProjectPath" value="$PROJECT_DIR$" />
<option name="externalSystemIdString" value="GRADLE" />
<option name="scriptParameters" value="" />
<option name="taskDescriptions">
<list />
</option>
<option name="taskNames">
<list>
<option value="clean" />
</list>
</option>
<option name="vmOptions" />
</ExternalSystemSettings>
<ExternalSystemDebugServerProcess>true</ExternalSystemDebugServerProcess>
<ExternalSystemReattachDebugProcess>true</ExternalSystemReattachDebugProcess>
<EXTENSION ID="com.intellij.execution.ExternalSystemRunConfigurationJavaExtension">
<extension name="net.ashald.envfile">
<option name="IS_ENABLED" value="false" />
<option name="IS_SUBST" value="false" />
<option name="IS_PATH_MACRO_SUPPORTED" value="false" />
<option name="IS_IGNORE_MISSING_FILES" value="false" />
<option name="IS_ENABLE_EXPERIMENTAL_INTEGRATIONS" value="false" />
<ENTRIES>
<ENTRY IS_ENABLED="true" PARSER="runconfig" IS_EXECUTABLE="false" />
</ENTRIES>
</extension>
</EXTENSION>
<DebugAllEnabled>false</DebugAllEnabled>
<RunAsTest>false</RunAsTest>
<method v="2" />
</configuration>
</component>

View File

@ -4,7 +4,7 @@
<instance-profile id="d" <instance-profile id="d"
name="Docthor" name="Docthor"
start-page="starter-topic.md"> start-page="docthor.md">
<toc-element topic="starter-topic.md"/> <toc-element topic="docthor.md"/>
</instance-profile> </instance-profile>

View File

@ -21,4 +21,12 @@ All libraries should be placed unter <path>libraries</path>
<def title="io"> <def title="io">
Abstraction about reading/writing to resources (filesystem, http, s3, etc pp) Abstraction about reading/writing to resources (filesystem, http, s3, etc pp)
</def> </def>
<def title="core-api">
Defines the core interfaces
</def>
<def title="tui">
Provides tui capabilities. When applied as koin modules
the resources will automatically print a read/write progressbar
on terminal.
</def>
</deflist> </deflist>

View File

@ -4,6 +4,7 @@ plugins {
dependencies { dependencies {
implementation(project(":libraries:tfidf")) implementation(project(":libraries:tfidf"))
implementation(project(":libraries:tui"))
} }
application { application {

View File

@ -6,15 +6,13 @@ import com.github.ajalt.clikt.parameters.options.option
import com.github.ajalt.clikt.parameters.options.required import com.github.ajalt.clikt.parameters.options.required
import com.github.ajalt.clikt.parameters.types.enum import com.github.ajalt.clikt.parameters.types.enum
import com.github.ajalt.clikt.parameters.types.file import com.github.ajalt.clikt.parameters.types.file
import de.itkl.fileprocessing.ProgressBarFactory import de.itkl.core_api.coreApiModule
import de.itkl.textprocessing.textProcessingModule import de.itkl.textprocessing.textProcessingModule
import de.itkl.tfidf.Language import de.itkl.tfidf.Language
import de.itkl.tfidf.TerminalProgressBarFactory
//import de.itkl.tfidf.TfIdf
import de.itkl.tfidf.TfIdfPipeline import de.itkl.tfidf.TfIdfPipeline
import de.itkl.tui.tuiModule
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
import org.koin.core.context.startKoin import org.koin.core.context.startKoin
import org.koin.dsl.module
class ComputeIdf : CliktCommand() { class ComputeIdf : CliktCommand() {
private val corpus by option(help = "corpus") private val corpus by option(help = "corpus")
@ -33,12 +31,9 @@ class ComputeIdf : CliktCommand() {
fun main(args: Array<String>) { fun main(args: Array<String>) {
startKoin { startKoin {
modules( modules(
coreApiModule,
textProcessingModule, textProcessingModule,
module { tuiModule)
single<ProgressBarFactory> {
TerminalProgressBarFactory()
}
})
ComputeIdf().main(args) ComputeIdf().main(args)
} }
} }

View File

@ -7,5 +7,5 @@ repositories {
} }
dependencies { dependencies {
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:1.8.20") implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:$embeddedKotlinVersion")
} }

View File

@ -13,6 +13,11 @@ dependencies {
val koin_version = "3.5.3" val koin_version = "3.5.3"
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3") implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3")
implementation("io.insert-koin:koin-core:$koin_version") implementation("io.insert-koin:koin-core:$koin_version")
implementation("org.jetbrains.kotlinx:kotlinx-datetime:0.5.0")
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
testImplementation("io.insert-koin:koin-test:$koin_version")
} }
java { java {

View File

@ -1,6 +0,0 @@
package de.itkl.clients
class MsOcr {
suspend fun ocr() {}
}

View File

@ -0,0 +1,11 @@
package de.itkl.core_api
import de.itkl.core_api.interfaces.NoopResourceReadDecorator
import de.itkl.core_api.interfaces.ResourceFactory
import de.itkl.core_api.interfaces.ResourceReadDecorator
import org.koin.dsl.module
val coreApiModule = module {
single<ResourceFactory> { ResourceFactory()}
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
}

View File

@ -0,0 +1,24 @@
package de.itkl.core_api.implementation
import de.itkl.core_api.interfaces.AbstractResource
import io.ktor.http.*
import java.io.File
import java.io.InputStream
import java.nio.file.Files
import java.nio.file.Path
import kotlin.io.path.name
class FileResource(override val path: Path) : AbstractResource() {
constructor(file: File): this(file.toPath())
override val length: Long by lazy { path.toFile().length() }
override val file: File?
get() = path.toFile()
override fun doRead(): InputStream {
return Files.newInputStream(path)
}
override val filename: String
get() = path.name
override val contentType: ContentType
get() = ContentType.fromFilePath(path.name).first()
}

View File

@ -1,5 +1,6 @@
package de.itkl.fileprocessing package de.itkl.core_api.implementation
import de.itkl.core_api.interfaces.ProgressBar
import java.io.InputStream import java.io.InputStream
/** /**
@ -9,9 +10,10 @@ import java.io.InputStream
* @property updateOp The operation to be executed when the number of bytes read changes. * @property updateOp The operation to be executed when the number of bytes read changes.
* @property bytesRead The number of bytes read from the input stream. * @property bytesRead The number of bytes read from the input stream.
*/ */
class ProgressInputStream( internal class ProgressInputStream(
private val inputStream: InputStream, private val inputStream: InputStream,
private val progressBar: ProgressBar) : InputStream() { private val progressBar: ProgressBar
) : InputStream() {
@Volatile @Volatile
var bytesRead: Long = 0 var bytesRead: Long = 0
private set(value) { private set(value) {

View File

@ -0,0 +1,19 @@
package de.itkl.core_api.implementation
import de.itkl.core_api.implementation.ProgressInputStream
import de.itkl.core_api.interfaces.ProgressBarFactory
import de.itkl.core_api.interfaces.Resource
import java.io.InputStream
internal class ProgressResource(
private val resource: Resource,
private val progressBarFactory: ProgressBarFactory
) : Resource by resource
{
override fun read(): InputStream {
return ProgressInputStream(
resource.read(),
progressBarFactory.new(this)
)
}
}

View File

@ -1,7 +1,6 @@
package de.itkl.core_api.interfaces package de.itkl.core_api.interfaces
import java.io.File import java.io.File
import java.io.InputStream
import java.nio.file.Path import java.nio.file.Path
interface FileProcessor { interface FileProcessor {

View File

@ -1,4 +1,4 @@
package de.itkl.fileprocessing package de.itkl.core_api.interfaces
interface ProgressBarFactory { interface ProgressBarFactory {
fun new(resource: Resource): ProgressBar fun new(resource: Resource): ProgressBar

View File

@ -3,20 +3,32 @@ package de.itkl.core_api.interfaces
import io.ktor.http.* import io.ktor.http.*
import org.koin.core.component.KoinComponent import org.koin.core.component.KoinComponent
import org.koin.core.component.get import org.koin.core.component.get
import java.io.File
import java.io.InputStream import java.io.InputStream
import java.nio.file.Path
abstract class Resource : KoinComponent { interface Resource {
abstract val filename: String val filename: String
abstract val contentType: ContentType val contentType: ContentType
abstract val length: Long? // TODO: Find a better method to avoid those nulls. Maybe subtyping the interface
val length: Long?
val file: File?
val path: Path?
fun read(): InputStream
}
protected abstract fun doRead(): InputStream /**
fun read(): InputStream { * Automatically adds koin injectable decorators to reading/writing
* operations
*/
abstract class AbstractResource : Resource, KoinComponent {
abstract fun doRead(): InputStream
final override fun read(): InputStream {
return length?.let { length -> return length?.let { length ->
get<ResourceReadDecorator>().decorate( get<ResourceReadDecorator>().decorate(
length = length, length = length,
read() doRead()
) )
} ?: read() } ?: doRead()
} }
} }

View File

@ -0,0 +1,16 @@
package de.itkl.core_api.interfaces
import de.itkl.core_api.implementation.FileResource
import de.itkl.core_api.implementation.ProgressResource
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import java.io.File
class ResourceFactory : KoinComponent {
private val progressBarFactory by inject<ProgressBarFactory>()
fun file(file: File): Resource {
val resource = FileResource(file)
return ProgressResource(resource, progressBarFactory)
}
}

View File

@ -0,0 +1,4 @@
package de.itkl.core_api.interfaces.data
interface DataTable : Iterable<List<String>> {
val columns: List<String>
}

View File

@ -1,6 +1,7 @@
package de.itkl.fileprocessing package de.itkl.fileprocessing
import de.itkl.core_api.interfaces.FileProcessor import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.core_api.interfaces.ResourceFactory
import io.github.oshai.kotlinlogging.KotlinLogging import io.github.oshai.kotlinlogging.KotlinLogging
import org.koin.core.component.KoinComponent import org.koin.core.component.KoinComponent
import org.koin.core.component.inject import org.koin.core.component.inject
@ -10,10 +11,9 @@ import kotlin.io.path.exists
private val Log = KotlinLogging.logger { } private val Log = KotlinLogging.logger { }
abstract class FileProcessingPipeline(private val force: Boolean = false) : KoinComponent { abstract class FileProcessingPipeline(private val force: Boolean = false) : KoinComponent {
private val resourceFactory: ResourceFactory by inject()
protected abstract val fileProcessor: List<FileProcessor> protected abstract val fileProcessor: List<FileProcessor>
private val progressBarFactory: ProgressBarFactory by inject()
suspend fun input(file: File) { suspend fun input(file: File) {
var currentFile = file var currentFile = file
fileProcessor.forEach { processor -> fileProcessor.forEach { processor ->
@ -22,9 +22,8 @@ abstract class FileProcessingPipeline(private val force: Boolean = false) : Koin
Log.info { "$target exists. Skipping" } Log.info { "$target exists. Skipping" }
} else { } else {
Log.info { "$target does not exists. Creating" } Log.info { "$target does not exists. Creating" }
val resource = FileResource(currentFile) val resource = resourceFactory.file(currentFile)
val progress = ProgressResource(resource, progressBarFactory) processor.process(resource)
processor.process(progress)
Log.info { "File created: $target" } Log.info { "File created: $target" }
} }
currentFile = target.toFile() currentFile = target.toFile()

View File

@ -1,42 +0,0 @@
package de.itkl.fileprocessing
import java.io.File
import java.io.InputStream
import java.nio.file.Files
import java.nio.file.Path
import kotlin.io.path.name
interface Resource {
val path: Path
val size: Long
val filename: String
fun toFile(): File = path.toFile()
fun length() = path.toFile().length()
fun read(): InputStream
}
class ProgressResource(
private val resource: Resource,
private val progressBarFactory: ProgressBarFactory
) : Resource by resource
{
override fun read(): InputStream {
return ProgressInputStream(
resource.read(),
progressBarFactory.new(this)
)
}
}
class FileResource(override val path: Path) : Resource {
constructor(file: File): this(file.toPath())
override val size: Long by lazy { path.toFile().length() }
override val filename: String
get() = path.name
override fun read(): InputStream {
return Files.newInputStream(path)
}
}

View File

@ -0,0 +1,15 @@
plugins {
kotlin("plugin.serialization") version embeddedKotlinVersion
}
val ktorVersion: String by project
dependencies {
api(project(":libraries:core-api"))
api("io.ktor:ktor-client-core:$ktorVersion")
api("io.ktor:ktor-client-core-jvm:$ktorVersion")
implementation("io.ktor:ktor-client-cio:$ktorVersion")
implementation("io.ktor:ktor-client-content-negotiation:$ktorVersion")
implementation("io.ktor:ktor-serialization-kotlinx-json:$ktorVersion")
}

View File

@ -0,0 +1 @@
ktorVersion=2.3.7

View File

@ -0,0 +1,30 @@
package de.itkl.httpClient.clients
import de.itkl.core_api.interfaces.Resource
import io.github.oshai.kotlinlogging.KotlinLogging
import io.ktor.client.*
import io.ktor.client.call.*
import io.ktor.client.request.*
import io.ktor.client.statement.*
import io.ktor.http.*
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
private val Log = KotlinLogging.logger { }
class MsOcr: KoinComponent {
private val httpClient: HttpClient by inject()
suspend fun ocr(resource: Resource): MsOcrResponse {
val response = httpClient.post {
url("http://10.54.150.152:5000/vision/v3.2/read/syncAnalyze")
parameters {
append("language", "de")
append("readingOrder", "natural")
}
contentType(resource.contentType)
setBody(resource.read())
}
println("got response: ${response.status} in ${response.responseTime}")
return response.body()
}
}

View File

@ -0,0 +1,81 @@
package de.itkl.httpClient.clients
import kotlinx.datetime.Instant
import kotlinx.datetime.LocalDateTime
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
@Serializable
data class MsOcrResponse(
@SerialName("analyzeResult")
val analyzeResult: AnalyzeResult,
@SerialName("createdDateTime")
val createdDateTime: Instant, // 2023-12-29T21:02:30Z
@SerialName("lastUpdatedDateTime")
val lastUpdatedDateTime: Instant, // 2023-12-29T21:02:31Z
@SerialName("status")
val status: String // succeeded
) {
@Serializable
data class AnalyzeResult(
@SerialName("modelVersion")
val modelVersion: String, // 2022-04-30
@SerialName("readResults")
val readResults: List<ReadResult>,
@SerialName("version")
val version: String // 3.2.0
) {
@Serializable
data class ReadResult(
@SerialName("angle")
val angle: Int, // 0
@SerialName("height")
val height: Int, // 3507
@SerialName("lines")
val lines: List<Line>,
@SerialName("page")
val page: Int, // 1
@SerialName("unit")
val unit: String, // pixel
@SerialName("width")
val width: Int // 2481
) {
@Serializable
data class Line(
@SerialName("appearance")
val appearance: Appearance,
@SerialName("boundingBox")
val boundingBox: List<Int>,
@SerialName("text")
val text: String, // Franz Mustermann
@SerialName("words")
val words: List<Word>
) {
@Serializable
data class Appearance(
@SerialName("style")
val style: Style
) {
@Serializable
data class Style(
@SerialName("confidence")
val confidence: Double, // 0.972
@SerialName("name")
val name: String // other
)
}
@Serializable
data class Word(
@SerialName("boundingBox")
val boundingBox: List<Int>,
@SerialName("confidence")
val confidence: Double, // 0.998
@SerialName("text")
val text: String // Franz
)
}
}
}
}

View File

@ -0,0 +1,14 @@
package de.itkl.httpClient
import io.ktor.client.*
import io.ktor.client.engine.cio.*
import io.ktor.client.plugins.contentnegotiation.*
import io.ktor.serialization.kotlinx.json.*
fun createHttpClient(): HttpClient {
return HttpClient(CIO) {
install(ContentNegotiation) {
json()
}
}
}

View File

@ -0,0 +1,10 @@
package de.itkl.httpClient
import de.itkl.httpClient.clients.MsOcr
import io.ktor.client.*
import org.koin.dsl.module
val httpClientModule = module {
single<HttpClient> { createHttpClient() }
single<MsOcr> { MsOcr() }
}

View File

@ -0,0 +1,36 @@
package de.itkl.httpClient.clients
import de.itkl.core_api.coreApiModule
import de.itkl.core_api.implementation.FileResource
import de.itkl.core_api.interfaces.Resource
import de.itkl.httpClient.httpClientModule
import kotlinx.coroutines.runBlocking
import org.junit.Rule
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.koin.core.component.inject
import org.koin.core.context.startKoin
import org.koin.test.KoinTest
import java.nio.file.Paths
class MsOcrTest : KoinTest {
@BeforeEach
fun start() {
startKoin {
printLogger()
modules(
coreApiModule,
httpClientModule)
}
}
@Test
fun `can create a request`() = runBlocking {
val msOcrClient: MsOcr by inject()
val resource = FileResource(Paths.get("../../assets/xs-reg/00001.jpg").toAbsolutePath())
val response = msOcrClient.ocr(resource)
println(response)
Unit
}
}

View File

@ -1,4 +1,5 @@
dependencies { dependencies {
api(project(":libraries:core-api"))
api("org.apache.lucene:lucene-analysis-common:9.9.0") api("org.apache.lucene:lucene-analysis-common:9.9.0")
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2") implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
implementation("com.google.guava:guava:32.1.3-jre") implementation("com.google.guava:guava:32.1.3-jre")

View File

@ -2,30 +2,16 @@ package de.itkl.textprocessing
import kotlinx.coroutines.flow.* import kotlinx.coroutines.flow.*
class Histogram(private val histo: MutableMap<String,UInt> = mutableMapOf()) : Iterable<Pair<String, UInt>>{ class Histogram(
private val histo: MutableMap<String,UInt> = mutableMapOf()
) : Iterable<Pair<String, UInt>>{
companion object { companion object {
suspend fun from(flow: Flow<String>): Histogram {
return Histogram().apply {
flow.collect(this::add)
}
}
fun fromBagOfWords(bagOfWords: BagOfWords): Histogram { fun fromBagOfWords(bagOfWords: BagOfWords): Histogram {
val result = Histogram() val result = Histogram()
bagOfWords.forEach(result::add) bagOfWords.forEach(result::add)
return result return result
} }
suspend fun fromBagOfWords(flow: Flow<BagOfWords>): Histogram {
val result = Histogram()
flow.collect() { value ->
value.forEach(result::add)
}
return result
}
fun from(sequence: Sequence<Map<String, String>>): Histogram { fun from(sequence: Sequence<Map<String, String>>): Histogram {
val histo = sequence.associate { map -> map["word"]!! to map["count"]!!.toUInt() } val histo = sequence.associate { map -> map["word"]!! to map["count"]!!.toUInt() }
.toMutableMap() .toMutableMap()

View File

@ -2,6 +2,7 @@ package de.itkl.textprocessing
import com.github.doyaaaaaken.kotlincsv.dsl.csvReader import com.github.doyaaaaaken.kotlincsv.dsl.csvReader
import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter
import de.itkl.core_api.interfaces.Resource
import java.io.File import java.io.File
import java.nio.file.Path import java.nio.file.Path
@ -16,9 +17,9 @@ class HistogramCsvStorage {
} }
} }
} }
suspend fun read(file: File): Histogram { suspend fun read(resource: Resource): Histogram {
return csvReader { } return csvReader { }
.openAsync(file) { .openAsync(resource.read()) {
val sequence = readAllWithHeaderAsSequence() val sequence = readAllWithHeaderAsSequence()
Histogram.from(sequence) Histogram.from(sequence)
} }

View File

@ -1,6 +1,7 @@
dependencies { dependencies {
api(project(":libraries:textprocessing")) api(project(":libraries:textprocessing"))
api(project(":libraries:fileprocessing")) api(project(":libraries:fileprocessing"))
api(project(":libraries:core-api"))
implementation("com.github.ajalt.mordant:mordant:2.2.0") implementation("com.github.ajalt.mordant:mordant:2.2.0")
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2") implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
implementation("com.google.guava:guava:32.1.3-jre") implementation("com.google.guava:guava:32.1.3-jre")

View File

@ -1,7 +1,7 @@
package de.itkl.tfidf package de.itkl.tfidf
import de.itkl.fileprocessing.FileProcessor import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.fileprocessing.Resource import de.itkl.core_api.interfaces.Resource
import de.itkl.processing.parallelUnordered import de.itkl.processing.parallelUnordered
import de.itkl.textprocessing.* import de.itkl.textprocessing.*
import de.itkl.textprocessing.interfaces.Stemmer import de.itkl.textprocessing.interfaces.Stemmer
@ -24,8 +24,8 @@ class DocumentFrequency : FileProcessor, KoinComponent {
} }
override suspend fun process(resource: Resource): File = coroutineScope { override suspend fun process(resource: Resource): File = coroutineScope {
Log.info { "Would produce: ${willProduce(resource.path)}" } Log.info { "Would produce: ${willProduce(resource.path!!)}" }
val resultFile = willProduce(resource.path).toFile() val resultFile = willProduce(resource.path!!).toFile()
val (numDocs, histogram) = TextFile(resource.read()) val (numDocs, histogram) = TextFile(resource.read())
.splitByEmptyLines() .splitByEmptyLines()
.withIndex() .withIndex()

View File

@ -1,43 +1,39 @@
package de.itkl.tfidf package de.itkl.tfidf
import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter
import de.itkl.fileprocessing.FileProcessor import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.fileprocessing.ProgressBarFactory import de.itkl.core_api.interfaces.Resource
import de.itkl.fileprocessing.Resource import de.itkl.core_api.interfaces.ProgressBarFactory
import de.itkl.textprocessing.HistogramCsvStorage import de.itkl.textprocessing.HistogramCsvStorage
import io.github.oshai.kotlinlogging.KotlinLogging
import org.koin.core.component.KoinComponent import org.koin.core.component.KoinComponent
import org.koin.core.component.inject import org.koin.core.component.inject
import java.io.File import java.io.File
import java.nio.file.Path import java.nio.file.Path
import kotlin.io.path.nameWithoutExtension import kotlin.io.path.nameWithoutExtension
import kotlin.math.ln
import kotlin.math.log
import kotlin.math.log10 import kotlin.math.log10
import kotlin.math.log2
private val Log = KotlinLogging.logger { }
class InverseDocumentFrequency : FileProcessor, KoinComponent { class InverseDocumentFrequency : FileProcessor, KoinComponent {
override fun willProduce(path: Path): Path { override fun willProduce(path: Path): Path {
return path.parent.resolve(path.nameWithoutExtension + "-inverse-document-frequency.csv") return path.parent.resolve(path.nameWithoutExtension + "-inverse-document-frequency.csv")
} }
override suspend fun process(resource: Resource): File { override suspend fun process(resource: Resource): File {
val histogram = HistogramCsvStorage().read(resource.toFile()) val histogram = HistogramCsvStorage().read(resource)
val numDocs = histogram val numDocs = histogram
.find { (word, count) -> word == "\$numDocs" }!! .find { (word, _) -> word == "\$numDocs" }!!
.second.toInt() .second.toInt()
val progressBarFactory: ProgressBarFactory by inject() val progressBarFactory: ProgressBarFactory by inject()
return progressBarFactory.new("compute idf", histogram.size.toLong()).use { progess -> return progressBarFactory.new("compute idf", histogram.size.toLong()).use { progress ->
csvWriter().openAsync(willProduce(resource.path).toFile(), append = false) { csvWriter().openAsync(willProduce(resource.path!!).toFile(), append = false) {
writeRow("word", "idf") writeRow("word", "idf")
histogram.forEach { (word, count) -> histogram.forEach { (word, count) ->
writeRow(word, idf(numDocs, count)) writeRow(word, idf(numDocs, count))
progess.step() progress.step()
} }
} }
resource.path.toFile() resource.path!!.toFile()
} }
} }

View File

@ -1,9 +1,7 @@
package de.itkl.tfidf package de.itkl.tfidf
import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.fileprocessing.FileProcessingPipeline import de.itkl.fileprocessing.FileProcessingPipeline
import de.itkl.fileprocessing.FileProcessor
import de.itkl.fileprocessing.ProgressBarFactory
import org.koin.core.component.KoinComponent
class TfIdfPipeline(force: Boolean) : FileProcessingPipeline(force) { class TfIdfPipeline(force: Boolean) : FileProcessingPipeline(force) {
override val fileProcessor = listOf<FileProcessor>( override val fileProcessor = listOf<FileProcessor>(

View File

@ -0,0 +1,4 @@
dependencies {
api(project(":libraries:core-api"))
implementation("com.github.ajalt.mordant:mordant:2.2.0")
}

View File

@ -0,0 +1,4 @@
package de.itkl.tui.implementation
class TerminalDataTableReporter {
}

View File

@ -1,11 +1,11 @@
package de.itkl.tfidf package de.itkl.tui.implementation
import com.github.ajalt.mordant.animation.ProgressAnimation import com.github.ajalt.mordant.animation.ProgressAnimation
import com.github.ajalt.mordant.animation.progressAnimation import com.github.ajalt.mordant.animation.progressAnimation
import com.github.ajalt.mordant.terminal.Terminal import com.github.ajalt.mordant.terminal.Terminal
import de.itkl.fileprocessing.ProgressBar import de.itkl.core_api.interfaces.Resource
import de.itkl.fileprocessing.ProgressBarFactory import de.itkl.core_api.interfaces.ProgressBar
import de.itkl.fileprocessing.Resource import de.itkl.core_api.interfaces.ProgressBarFactory
class TerminalProgressBarFactory : ProgressBarFactory { class TerminalProgressBarFactory : ProgressBarFactory {
private val terminal = Terminal() private val terminal = Terminal()
@ -17,7 +17,7 @@ class TerminalProgressBarFactory : ProgressBarFactory {
completed() completed()
timeRemaining() timeRemaining()
} }
return TerminalProgressBar(animation, resource.length()) return TerminalProgressBar(animation, resource.length!!)
} }
override fun new(name: String, max: Long): ProgressBar { override fun new(name: String, max: Long): ProgressBar {

View File

@ -0,0 +1,14 @@
package de.itkl.tui
import de.itkl.core_api.interfaces.ProgressBarFactory
import de.itkl.tui.implementation.TerminalProgressBarFactory
import org.koin.dsl.module
/**
* Add terminal ui capabilities
*/
val tuiModule = module {
single<ProgressBarFactory> {
TerminalProgressBarFactory()
}
}