diff --git a/.run/docthor [clean].run.xml b/.run/docthor [clean].run.xml
new file mode 100644
index 0000000..3f1cca9
--- /dev/null
+++ b/.run/docthor [clean].run.xml
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+
+
+
+ true
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+ false
+ false
+
+
+
\ No newline at end of file
diff --git a/Writerside/topics/starter-topic.md b/Writerside/topics/starter-topic.md
index 7c30b5d..58b23bb 100644
--- a/Writerside/topics/starter-topic.md
+++ b/Writerside/topics/starter-topic.md
@@ -21,4 +21,7 @@ All libraries should be placed unter libraries
Abstraction about reading/writing to resources (filesystem, http, s3, etc pp)
+
+Defines the core interfaces
+
\ No newline at end of file
diff --git a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/FileProcessor.kt b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/FileProcessor.kt
index c5f907d..14c588a 100644
--- a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/FileProcessor.kt
+++ b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/FileProcessor.kt
@@ -1,7 +1,6 @@
package de.itkl.core_api.interfaces
import java.io.File
-import java.io.InputStream
import java.nio.file.Path
interface FileProcessor {
diff --git a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/Resource.kt b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/Resource.kt
index f6c437f..4e18b96 100644
--- a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/Resource.kt
+++ b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/Resource.kt
@@ -3,20 +3,31 @@ package de.itkl.core_api.interfaces
import io.ktor.http.*
import org.koin.core.component.KoinComponent
import org.koin.core.component.get
+import java.io.File
import java.io.InputStream
+import java.nio.file.Path
-abstract class Resource : KoinComponent {
- abstract val filename: String
- abstract val contentType: ContentType
- abstract val length: Long?
+interface Resource : KoinComponent {
+ val filename: String
+ val contentType: ContentType
+ val length: Long?
+ val file: File?
+ val path: Path?
+ fun read(): InputStream
+}
- protected abstract fun doRead(): InputStream
- fun read(): InputStream {
+/**
+ * Automatically adds koin injectable decorators to reading/writing
+ * operations
+ */
+abstract class AbstractResource : Resource, KoinComponent {
+ abstract fun doRead(): InputStream
+ final override fun read(): InputStream {
return length?.let { length ->
get().decorate(
length = length,
- read()
+ doRead()
)
- } ?: read()
+ } ?: doRead()
}
}
\ No newline at end of file
diff --git a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/FileResource.kt b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/FileResource.kt
new file mode 100644
index 0000000..64bb013
--- /dev/null
+++ b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/FileResource.kt
@@ -0,0 +1,24 @@
+package de.itkl.fileprocessing
+
+import de.itkl.core_api.interfaces.AbstractResource
+import io.ktor.http.*
+import java.io.File
+import java.io.InputStream
+import java.nio.file.Files
+import java.nio.file.Path
+import kotlin.io.path.name
+
+class FileResource(override val path: Path) : AbstractResource() {
+ constructor(file: File): this(file.toPath())
+ override val length: Long by lazy { path.toFile().length() }
+ override val file: File?
+ get() = path.toFile()
+
+ override fun doRead(): InputStream {
+ return Files.newInputStream(path)
+ }
+ override val filename: String
+ get() = path.name
+ override val contentType: ContentType
+ get() = ContentType.fromFilePath(path.name).first()
+}
\ No newline at end of file
diff --git a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/ProgressBarFactory.kt b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/ProgressBarFactory.kt
index 3da49f9..4e0c6d7 100644
--- a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/ProgressBarFactory.kt
+++ b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/ProgressBarFactory.kt
@@ -1,5 +1,7 @@
package de.itkl.fileprocessing
+import de.itkl.core_api.interfaces.Resource
+
interface ProgressBarFactory {
fun new(resource: Resource): ProgressBar
fun new(name: String, max: Long): ProgressBar
diff --git a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/Resource.kt b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/Resource.kt
index 3d179c5..ab8f79f 100644
--- a/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/Resource.kt
+++ b/libraries/fileprocessing/src/main/kotlin/de/itkl/fileprocessing/Resource.kt
@@ -1,22 +1,14 @@
package de.itkl.fileprocessing
+import de.itkl.core_api.interfaces.AbstractResource
+import de.itkl.core_api.interfaces.Resource
+import io.ktor.http.*
import java.io.File
import java.io.InputStream
import java.nio.file.Files
import java.nio.file.Path
import kotlin.io.path.name
-interface Resource {
- val path: Path
- val size: Long
- val filename: String
- fun toFile(): File = path.toFile()
-
- fun length() = path.toFile().length()
-
- fun read(): InputStream
-}
-
class ProgressResource(
private val resource: Resource,
private val progressBarFactory: ProgressBarFactory
@@ -29,14 +21,3 @@ class ProgressResource(
)
}
}
-
-class FileResource(override val path: Path) : Resource {
- constructor(file: File): this(file.toPath())
- override val size: Long by lazy { path.toFile().length() }
- override val filename: String
- get() = path.name
-
- override fun read(): InputStream {
- return Files.newInputStream(path)
- }
-}
\ No newline at end of file
diff --git a/libraries/textprocessing/build.gradle.kts b/libraries/textprocessing/build.gradle.kts
index 6cf74f9..5ec3fd1 100644
--- a/libraries/textprocessing/build.gradle.kts
+++ b/libraries/textprocessing/build.gradle.kts
@@ -1,4 +1,5 @@
dependencies {
+ api(project(":libraries:core-api"))
api("org.apache.lucene:lucene-analysis-common:9.9.0")
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
implementation("com.google.guava:guava:32.1.3-jre")
diff --git a/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/HistogramCsvStorage.kt b/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/HistogramCsvStorage.kt
index fca404f..58f4cbf 100644
--- a/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/HistogramCsvStorage.kt
+++ b/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/HistogramCsvStorage.kt
@@ -2,6 +2,7 @@ package de.itkl.textprocessing
import com.github.doyaaaaaken.kotlincsv.dsl.csvReader
import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter
+import de.itkl.core_api.interfaces.Resource
import java.io.File
import java.nio.file.Path
@@ -16,9 +17,9 @@ class HistogramCsvStorage {
}
}
}
- suspend fun read(file: File): Histogram {
+ suspend fun read(resource: Resource): Histogram {
return csvReader { }
- .openAsync(file) {
+ .openAsync(resource.read()) {
val sequence = readAllWithHeaderAsSequence()
Histogram.from(sequence)
}
diff --git a/libraries/tfidf/build.gradle.kts b/libraries/tfidf/build.gradle.kts
index 5da8a65..5899dc3 100644
--- a/libraries/tfidf/build.gradle.kts
+++ b/libraries/tfidf/build.gradle.kts
@@ -1,6 +1,7 @@
dependencies {
api(project(":libraries:textprocessing"))
api(project(":libraries:fileprocessing"))
+ api(project(":libraries:core-api"))
implementation("com.github.ajalt.mordant:mordant:2.2.0")
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
implementation("com.google.guava:guava:32.1.3-jre")
diff --git a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/DocumentFrequency.kt b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/DocumentFrequency.kt
index 263c9cc..b0ba27f 100644
--- a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/DocumentFrequency.kt
+++ b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/DocumentFrequency.kt
@@ -1,7 +1,7 @@
package de.itkl.tfidf
-import de.itkl.fileprocessing.FileProcessor
-import de.itkl.fileprocessing.Resource
+import de.itkl.core_api.interfaces.FileProcessor
+import de.itkl.core_api.interfaces.Resource
import de.itkl.processing.parallelUnordered
import de.itkl.textprocessing.*
import de.itkl.textprocessing.interfaces.Stemmer
@@ -24,8 +24,8 @@ class DocumentFrequency : FileProcessor, KoinComponent {
}
override suspend fun process(resource: Resource): File = coroutineScope {
- Log.info { "Would produce: ${willProduce(resource.path)}" }
- val resultFile = willProduce(resource.path).toFile()
+ Log.info { "Would produce: ${willProduce(resource.path!!)}" }
+ val resultFile = willProduce(resource.path!!).toFile()
val (numDocs, histogram) = TextFile(resource.read())
.splitByEmptyLines()
.withIndex()
diff --git a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/InverseDocumentFrequency.kt b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/InverseDocumentFrequency.kt
index 1e71705..854edf8 100644
--- a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/InverseDocumentFrequency.kt
+++ b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/InverseDocumentFrequency.kt
@@ -1,43 +1,39 @@
package de.itkl.tfidf
import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter
-import de.itkl.fileprocessing.FileProcessor
+import de.itkl.core_api.interfaces.FileProcessor
+import de.itkl.core_api.interfaces.Resource
import de.itkl.fileprocessing.ProgressBarFactory
-import de.itkl.fileprocessing.Resource
import de.itkl.textprocessing.HistogramCsvStorage
-import io.github.oshai.kotlinlogging.KotlinLogging
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import java.io.File
import java.nio.file.Path
import kotlin.io.path.nameWithoutExtension
-import kotlin.math.ln
-import kotlin.math.log
import kotlin.math.log10
-import kotlin.math.log2
-private val Log = KotlinLogging.logger { }
class InverseDocumentFrequency : FileProcessor, KoinComponent {
override fun willProduce(path: Path): Path {
return path.parent.resolve(path.nameWithoutExtension + "-inverse-document-frequency.csv")
}
+
override suspend fun process(resource: Resource): File {
- val histogram = HistogramCsvStorage().read(resource.toFile())
+ val histogram = HistogramCsvStorage().read(resource)
val numDocs = histogram
- .find { (word, count) -> word == "\$numDocs" }!!
+ .find { (word, _) -> word == "\$numDocs" }!!
.second.toInt()
val progressBarFactory: ProgressBarFactory by inject()
- return progressBarFactory.new("compute idf", histogram.size.toLong()).use { progess ->
- csvWriter().openAsync(willProduce(resource.path).toFile(), append = false) {
+ return progressBarFactory.new("compute idf", histogram.size.toLong()).use { progress ->
+ csvWriter().openAsync(willProduce(resource.path!!).toFile(), append = false) {
writeRow("word", "idf")
histogram.forEach { (word, count) ->
writeRow(word, idf(numDocs, count))
- progess.step()
+ progress.step()
}
}
- resource.path.toFile()
+ resource.path!!.toFile()
}
}
diff --git a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TerminalProgressBarFactory.kt b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TerminalProgressBarFactory.kt
index 0f63f46..8d31d90 100644
--- a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TerminalProgressBarFactory.kt
+++ b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TerminalProgressBarFactory.kt
@@ -3,9 +3,9 @@ package de.itkl.tfidf
import com.github.ajalt.mordant.animation.ProgressAnimation
import com.github.ajalt.mordant.animation.progressAnimation
import com.github.ajalt.mordant.terminal.Terminal
+import de.itkl.core_api.interfaces.Resource
import de.itkl.fileprocessing.ProgressBar
import de.itkl.fileprocessing.ProgressBarFactory
-import de.itkl.fileprocessing.Resource
class TerminalProgressBarFactory : ProgressBarFactory {
private val terminal = Terminal()
@@ -17,7 +17,7 @@ class TerminalProgressBarFactory : ProgressBarFactory {
completed()
timeRemaining()
}
- return TerminalProgressBar(animation, resource.length())
+ return TerminalProgressBar(animation, resource.length!!)
}
override fun new(name: String, max: Long): ProgressBar {
diff --git a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TfIdfPipeline.kt b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TfIdfPipeline.kt
index 2d4ff73..a22eecd 100644
--- a/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TfIdfPipeline.kt
+++ b/libraries/tfidf/src/main/kotlin/de/itkl/tfidf/TfIdfPipeline.kt
@@ -1,9 +1,7 @@
package de.itkl.tfidf
+import de.itkl.core_api.interfaces.FileProcessor
import de.itkl.fileprocessing.FileProcessingPipeline
-import de.itkl.fileprocessing.FileProcessor
-import de.itkl.fileprocessing.ProgressBarFactory
-import org.koin.core.component.KoinComponent
class TfIdfPipeline(force: Boolean) : FileProcessingPipeline(force) {
override val fileProcessor = listOf(