From 6971e0022150f400f62c2e11b333b292ba8c004e Mon Sep 17 00:00:00 2001 From: Timo Bryant Date: Wed, 27 Dec 2023 15:57:41 +0100 Subject: [PATCH] adding core api --- .run/docthor [clean].run.xml | 36 +++++++++++++++++++ Writerside/topics/starter-topic.md | 3 ++ .../itkl/core_api/interfaces/FileProcessor.kt | 1 - .../de/itkl/core_api/interfaces/Resource.kt | 27 +++++++++----- .../de/itkl/fileprocessing/FileResource.kt | 24 +++++++++++++ .../itkl/fileprocessing/ProgressBarFactory.kt | 2 ++ .../kotlin/de/itkl/fileprocessing/Resource.kt | 25 ++----------- libraries/textprocessing/build.gradle.kts | 1 + .../textprocessing/HistogramCsvStorage.kt | 5 +-- libraries/tfidf/build.gradle.kts | 1 + .../kotlin/de/itkl/tfidf/DocumentFrequency.kt | 8 ++--- .../de/itkl/tfidf/InverseDocumentFrequency.kt | 22 +++++------- .../itkl/tfidf/TerminalProgressBarFactory.kt | 4 +-- .../kotlin/de/itkl/tfidf/TfIdfPipeline.kt | 4 +-- 14 files changed, 108 insertions(+), 55 deletions(-) create mode 100644 .run/docthor [clean].run.xml create mode 100644 libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/FileResource.kt diff --git a/.run/docthor [clean].run.xml b/.run/docthor [clean].run.xml new file mode 100644 index 0000000..3f1cca9 --- /dev/null +++ b/.run/docthor [clean].run.xml @@ -0,0 +1,36 @@ + + + + + + + true + true + + + + + false + false + + + \ No newline at end of file diff --git a/Writerside/topics/starter-topic.md b/Writerside/topics/starter-topic.md index 7c30b5d..58b23bb 100644 --- a/Writerside/topics/starter-topic.md +++ b/Writerside/topics/starter-topic.md @@ -21,4 +21,7 @@ All libraries should be placed unter libraries Abstraction about reading/writing to resources (filesystem, http, s3, etc pp) + +Defines the core interfaces + \ No newline at end of file diff --git a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/FileProcessor.kt b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/FileProcessor.kt index c5f907d..14c588a 100644 --- a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/FileProcessor.kt +++ b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/FileProcessor.kt @@ -1,7 +1,6 @@ package de.itkl.core_api.interfaces import java.io.File -import java.io.InputStream import java.nio.file.Path interface FileProcessor { diff --git a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/Resource.kt b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/Resource.kt index f6c437f..4e18b96 100644 --- a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/Resource.kt +++ b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/Resource.kt @@ -3,20 +3,31 @@ package de.itkl.core_api.interfaces import io.ktor.http.* import org.koin.core.component.KoinComponent import org.koin.core.component.get +import java.io.File import java.io.InputStream +import java.nio.file.Path -abstract class Resource : KoinComponent { - abstract val filename: String - abstract val contentType: ContentType - abstract val length: Long? +interface Resource : KoinComponent { + val filename: String + val contentType: ContentType + val length: Long? + val file: File? + val path: Path? + fun read(): InputStream +} - protected abstract fun doRead(): InputStream - fun read(): InputStream { +/** + * Automatically adds koin injectable decorators to reading/writing + * operations + */ +abstract class AbstractResource : Resource, KoinComponent { + abstract fun doRead(): InputStream + final override fun read(): InputStream { return length?.let { length -> get().decorate( length = length, - read() + doRead() ) - } ?: read() + } ?: doRead() } } \ No newline at end of file diff --git a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/FileResource.kt b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/FileResource.kt new file mode 100644 index 0000000..64bb013 --- /dev/null +++ b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/FileResource.kt @@ -0,0 +1,24 @@ +package de.itkl.fileprocessing + +import de.itkl.core_api.interfaces.AbstractResource +import io.ktor.http.* +import java.io.File +import java.io.InputStream +import java.nio.file.Files +import java.nio.file.Path +import kotlin.io.path.name + +class FileResource(override val path: Path) : AbstractResource() { + constructor(file: File): this(file.toPath()) + override val length: Long by lazy { path.toFile().length() } + override val file: File? + get() = path.toFile() + + override fun doRead(): InputStream { + return Files.newInputStream(path) + } + override val filename: String + get() = path.name + override val contentType: ContentType + get() = ContentType.fromFilePath(path.name).first() +} \ No newline at end of file diff --git a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/ProgressBarFactory.kt b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/ProgressBarFactory.kt index 3da49f9..4e0c6d7 100644 --- a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/ProgressBarFactory.kt +++ b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/ProgressBarFactory.kt @@ -1,5 +1,7 @@ package de.itkl.fileprocessing +import de.itkl.core_api.interfaces.Resource + interface ProgressBarFactory { fun new(resource: Resource): ProgressBar fun new(name: String, max: Long): ProgressBar diff --git a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/Resource.kt b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/Resource.kt index 3d179c5..ab8f79f 100644 --- a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/Resource.kt +++ b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/Resource.kt @@ -1,22 +1,14 @@ package de.itkl.fileprocessing +import de.itkl.core_api.interfaces.AbstractResource +import de.itkl.core_api.interfaces.Resource +import io.ktor.http.* import java.io.File import java.io.InputStream import java.nio.file.Files import java.nio.file.Path import kotlin.io.path.name -interface Resource { - val path: Path - val size: Long - val filename: String - fun toFile(): File = path.toFile() - - fun length() = path.toFile().length() - - fun read(): InputStream -} - class ProgressResource( private val resource: Resource, private val progressBarFactory: ProgressBarFactory @@ -29,14 +21,3 @@ class ProgressResource( ) } } - -class FileResource(override val path: Path) : Resource { - constructor(file: File): this(file.toPath()) - override val size: Long by lazy { path.toFile().length() } - override val filename: String - get() = path.name - - override fun read(): InputStream { - return Files.newInputStream(path) - } -} \ No newline at end of file diff --git a/libraries/textprocessing/build.gradle.kts b/libraries/textprocessing/build.gradle.kts index 6cf74f9..5ec3fd1 100644 --- a/libraries/textprocessing/build.gradle.kts +++ b/libraries/textprocessing/build.gradle.kts @@ -1,4 +1,5 @@ dependencies { + api(project(":libraries:core-api")) api("org.apache.lucene:lucene-analysis-common:9.9.0") implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2") implementation("com.google.guava:guava:32.1.3-jre") diff --git a/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/HistogramCsvStorage.kt b/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/HistogramCsvStorage.kt index fca404f..58f4cbf 100644 --- a/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/HistogramCsvStorage.kt +++ b/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/HistogramCsvStorage.kt @@ -2,6 +2,7 @@ package de.itkl.textprocessing import com.github.doyaaaaaken.kotlincsv.dsl.csvReader import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter +import de.itkl.core_api.interfaces.Resource import java.io.File import java.nio.file.Path @@ -16,9 +17,9 @@ class HistogramCsvStorage { } } } - suspend fun read(file: File): Histogram { + suspend fun read(resource: Resource): Histogram { return csvReader { } - .openAsync(file) { + .openAsync(resource.read()) { val sequence = readAllWithHeaderAsSequence() Histogram.from(sequence) } diff --git a/libraries/tfidf/build.gradle.kts b/libraries/tfidf/build.gradle.kts index 5da8a65..5899dc3 100644 --- a/libraries/tfidf/build.gradle.kts +++ b/libraries/tfidf/build.gradle.kts @@ -1,6 +1,7 @@ dependencies { api(project(":libraries:textprocessing")) api(project(":libraries:fileprocessing")) + api(project(":libraries:core-api")) implementation("com.github.ajalt.mordant:mordant:2.2.0") implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2") implementation("com.google.guava:guava:32.1.3-jre") diff --git a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/DocumentFrequency.kt b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/DocumentFrequency.kt index 263c9cc..b0ba27f 100644 --- a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/DocumentFrequency.kt +++ b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/DocumentFrequency.kt @@ -1,7 +1,7 @@ package de.itkl.tfidf -import de.itkl.fileprocessing.FileProcessor -import de.itkl.fileprocessing.Resource +import de.itkl.core_api.interfaces.FileProcessor +import de.itkl.core_api.interfaces.Resource import de.itkl.processing.parallelUnordered import de.itkl.textprocessing.* import de.itkl.textprocessing.interfaces.Stemmer @@ -24,8 +24,8 @@ class DocumentFrequency : FileProcessor, KoinComponent { } override suspend fun process(resource: Resource): File = coroutineScope { - Log.info { "Would produce: ${willProduce(resource.path)}" } - val resultFile = willProduce(resource.path).toFile() + Log.info { "Would produce: ${willProduce(resource.path!!)}" } + val resultFile = willProduce(resource.path!!).toFile() val (numDocs, histogram) = TextFile(resource.read()) .splitByEmptyLines() .withIndex() diff --git a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/InverseDocumentFrequency.kt b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/InverseDocumentFrequency.kt index 1e71705..854edf8 100644 --- a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/InverseDocumentFrequency.kt +++ b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/InverseDocumentFrequency.kt @@ -1,43 +1,39 @@ package de.itkl.tfidf import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter -import de.itkl.fileprocessing.FileProcessor +import de.itkl.core_api.interfaces.FileProcessor +import de.itkl.core_api.interfaces.Resource import de.itkl.fileprocessing.ProgressBarFactory -import de.itkl.fileprocessing.Resource import de.itkl.textprocessing.HistogramCsvStorage -import io.github.oshai.kotlinlogging.KotlinLogging import org.koin.core.component.KoinComponent import org.koin.core.component.inject import java.io.File import java.nio.file.Path import kotlin.io.path.nameWithoutExtension -import kotlin.math.ln -import kotlin.math.log import kotlin.math.log10 -import kotlin.math.log2 -private val Log = KotlinLogging.logger { } class InverseDocumentFrequency : FileProcessor, KoinComponent { override fun willProduce(path: Path): Path { return path.parent.resolve(path.nameWithoutExtension + "-inverse-document-frequency.csv") } + override suspend fun process(resource: Resource): File { - val histogram = HistogramCsvStorage().read(resource.toFile()) + val histogram = HistogramCsvStorage().read(resource) val numDocs = histogram - .find { (word, count) -> word == "\$numDocs" }!! + .find { (word, _) -> word == "\$numDocs" }!! .second.toInt() val progressBarFactory: ProgressBarFactory by inject() - return progressBarFactory.new("compute idf", histogram.size.toLong()).use { progess -> - csvWriter().openAsync(willProduce(resource.path).toFile(), append = false) { + return progressBarFactory.new("compute idf", histogram.size.toLong()).use { progress -> + csvWriter().openAsync(willProduce(resource.path!!).toFile(), append = false) { writeRow("word", "idf") histogram.forEach { (word, count) -> writeRow(word, idf(numDocs, count)) - progess.step() + progress.step() } } - resource.path.toFile() + resource.path!!.toFile() } } diff --git a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TerminalProgressBarFactory.kt b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TerminalProgressBarFactory.kt index 0f63f46..8d31d90 100644 --- a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TerminalProgressBarFactory.kt +++ b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TerminalProgressBarFactory.kt @@ -3,9 +3,9 @@ package de.itkl.tfidf import com.github.ajalt.mordant.animation.ProgressAnimation import com.github.ajalt.mordant.animation.progressAnimation import com.github.ajalt.mordant.terminal.Terminal +import de.itkl.core_api.interfaces.Resource import de.itkl.fileprocessing.ProgressBar import de.itkl.fileprocessing.ProgressBarFactory -import de.itkl.fileprocessing.Resource class TerminalProgressBarFactory : ProgressBarFactory { private val terminal = Terminal() @@ -17,7 +17,7 @@ class TerminalProgressBarFactory : ProgressBarFactory { completed() timeRemaining() } - return TerminalProgressBar(animation, resource.length()) + return TerminalProgressBar(animation, resource.length!!) } override fun new(name: String, max: Long): ProgressBar { diff --git a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TfIdfPipeline.kt b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TfIdfPipeline.kt index 2d4ff73..a22eecd 100644 --- a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TfIdfPipeline.kt +++ b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TfIdfPipeline.kt @@ -1,9 +1,7 @@ package de.itkl.tfidf +import de.itkl.core_api.interfaces.FileProcessor import de.itkl.fileprocessing.FileProcessingPipeline -import de.itkl.fileprocessing.FileProcessor -import de.itkl.fileprocessing.ProgressBarFactory -import org.koin.core.component.KoinComponent class TfIdfPipeline(force: Boolean) : FileProcessingPipeline(force) { override val fileProcessor = listOf(