Compare commits
10 Commits
71e066fcde
...
78af3f0d50
| Author | SHA1 | Date |
|---|---|---|
|
|
78af3f0d50 | |
|
|
d973262dbd | |
|
|
1ef987f611 | |
|
|
c40ab54012 | |
|
|
3e5534f184 | |
|
|
81a30dd2f6 | |
|
|
606837a76f | |
|
|
46f1c49ab1 | |
|
|
4cafac4583 | |
|
|
13110fa8e5 |
|
|
@ -6,14 +6,17 @@ import com.github.ajalt.clikt.parameters.options.option
|
||||||
import com.github.ajalt.clikt.parameters.options.required
|
import com.github.ajalt.clikt.parameters.options.required
|
||||||
import com.github.ajalt.clikt.parameters.types.enum
|
import com.github.ajalt.clikt.parameters.types.enum
|
||||||
import com.github.ajalt.clikt.parameters.types.file
|
import com.github.ajalt.clikt.parameters.types.file
|
||||||
import de.itkl.textprocessing.TextFile
|
import de.itkl.fileprocessing.ProgressBarFactory
|
||||||
|
import de.itkl.textprocessing.textProcessingModule
|
||||||
import de.itkl.tfidf.Language
|
import de.itkl.tfidf.Language
|
||||||
|
import de.itkl.tfidf.TerminalProgressBarFactory
|
||||||
//import de.itkl.tfidf.TfIdf
|
//import de.itkl.tfidf.TfIdf
|
||||||
import de.itkl.tfidf.TfIdfPipeline
|
import de.itkl.tfidf.TfIdfPipeline
|
||||||
import kotlinx.coroutines.flow.take
|
|
||||||
import kotlinx.coroutines.runBlocking
|
import kotlinx.coroutines.runBlocking
|
||||||
|
import org.koin.core.context.startKoin
|
||||||
|
import org.koin.dsl.module
|
||||||
|
|
||||||
class ComputeTf : CliktCommand() {
|
class ComputeIdf : CliktCommand() {
|
||||||
private val corpus by option(help = "corpus")
|
private val corpus by option(help = "corpus")
|
||||||
.file()
|
.file()
|
||||||
.required()
|
.required()
|
||||||
|
|
@ -22,18 +25,20 @@ class ComputeTf : CliktCommand() {
|
||||||
.required()
|
.required()
|
||||||
|
|
||||||
override fun run() = runBlocking {
|
override fun run() = runBlocking {
|
||||||
TfIdfPipeline(language = Language.DE)
|
TfIdfPipeline(force = true)
|
||||||
.input(corpus)
|
.input(corpus)
|
||||||
// TextFile(corpus).splitByEmptyLines()
|
|
||||||
// .take(10)
|
|
||||||
// .collect { println(it) }
|
|
||||||
// val tfIdf = TfIdf()
|
|
||||||
// val histogram = tfIdf.computeTf(
|
|
||||||
// corpus,
|
|
||||||
// language
|
|
||||||
// )
|
|
||||||
// val tf = tfIdf.normalizeTf(histogram, corpus.toPath().parent.resolve("${corpus.nameWithoutExtension}-tf.csv").toFile())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun main(args: Array<String>) = ComputeTf().main(args)
|
fun main(args: Array<String>) {
|
||||||
|
startKoin {
|
||||||
|
modules(
|
||||||
|
textProcessingModule,
|
||||||
|
module {
|
||||||
|
single<ProgressBarFactory> {
|
||||||
|
TerminalProgressBarFactory()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
ComputeIdf().main(args)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
/*
|
|
||||||
* This file was generated by the Gradle 'init' task.
|
|
||||||
*
|
|
||||||
* This project uses @Incubating APIs which are subject to change.
|
|
||||||
*/
|
|
||||||
|
|
||||||
plugins {
|
|
||||||
// Support convention plugins written in Kotlin. Convention plugins are build scripts in 'src/main' that automatically become available as plugins in the main build.
|
|
||||||
`kotlin-dsl`
|
|
||||||
}
|
|
||||||
|
|
||||||
repositories {
|
|
||||||
// Use the plugin portal to apply community plugins in convention plugins.
|
|
||||||
gradlePluginPortal()
|
|
||||||
}
|
|
||||||
|
|
||||||
dependencies {
|
|
||||||
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:1.8.20")
|
|
||||||
}
|
|
||||||
|
|
@ -1,8 +0,0 @@
|
||||||
/*
|
|
||||||
* This file was generated by the Gradle 'init' task.
|
|
||||||
*
|
|
||||||
* This settings file is used to specify which projects to include in your build-logic build.
|
|
||||||
* This project uses @Incubating APIs which are subject to change.
|
|
||||||
*/
|
|
||||||
|
|
||||||
rootProject.name = "docthor-build-logic"
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
import org.codehaus.groovy.tools.shell.util.Logger.io
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This file was generated by the Gradle 'init' task.
|
|
||||||
*
|
|
||||||
* This project uses @Incubating APIs which are subject to change.
|
|
||||||
*/
|
|
||||||
|
|
||||||
plugins {
|
|
||||||
// Apply the common convention plugin for shared build configuration between library and application projects.
|
|
||||||
id("docthor.kotlin-common-conventions")
|
|
||||||
|
|
||||||
// Apply the java-library plugin for API and implementation separation.
|
|
||||||
`java-library`
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
dependencies {
|
|
||||||
api("io.github.oshai:kotlin-logging-jvm:5.1.0")
|
|
||||||
|
|
||||||
implementation("org.slf4j:slf4j-api:2.0.9")
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
project(":libraries").subprojects {
|
||||||
|
apply(plugin = "docthor.kotlin-library-conventions")
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
plugins {
|
||||||
|
`kotlin-dsl`
|
||||||
|
}
|
||||||
|
|
||||||
|
repositories {
|
||||||
|
gradlePluginPortal()
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:1.8.20")
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
rootProject.name = "docthor-build-logic"
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import org.gradle.api.plugins.jvm.JvmTestSuite
|
import org.gradle.api.plugins.jvm.JvmTestSuite
|
||||||
|
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
|
||||||
|
|
||||||
plugins {
|
plugins {
|
||||||
id("org.jetbrains.kotlin.jvm")
|
id("org.jetbrains.kotlin.jvm")
|
||||||
|
|
@ -9,7 +10,23 @@ repositories {
|
||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
|
val koin_version = "3.5.3"
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3")
|
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3")
|
||||||
|
implementation("io.insert-koin:koin-core:$koin_version")
|
||||||
|
}
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion.set(JavaLanguageVersion.of("19"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks
|
||||||
|
.withType<org.jetbrains.kotlin.gradle.tasks.KotlinJvmCompile>()
|
||||||
|
.configureEach {
|
||||||
|
compilerOptions {
|
||||||
|
jvmTarget.set(JvmTarget.JVM_19)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
testing {
|
testing {
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
plugins {
|
||||||
|
id("docthor.kotlin-common-conventions")
|
||||||
|
`java-library`
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
api("io.github.oshai:kotlin-logging-jvm:5.1.0")
|
||||||
|
implementation("org.slf4j:slf4j-api:2.0.9")
|
||||||
|
}
|
||||||
|
|
@ -1,6 +1,3 @@
|
||||||
# This file was generated by the Gradle 'init' task.
|
|
||||||
# https://docs.gradle.org/current/userguide/build_environment.html#sec:gradle_configuration_properties
|
|
||||||
|
|
||||||
org.gradle.parallel=true
|
org.gradle.parallel=true
|
||||||
org.gradle.caching=true
|
org.gradle.caching=true
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2.1-bin.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip
|
||||||
networkTimeout=10000
|
networkTimeout=10000
|
||||||
validateDistributionUrl=true
|
validateDistributionUrl=true
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,3 @@
|
||||||
plugins {
|
|
||||||
id("docthor.kotlin-library-conventions")
|
|
||||||
}
|
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3")
|
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,26 +1,31 @@
|
||||||
package de.itkl.fileprocessing
|
package de.itkl.fileprocessing
|
||||||
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import org.koin.core.annotation.KoinReflectAPI
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.inject
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import kotlin.io.path.exists
|
import kotlin.io.path.exists
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
private val Log = KotlinLogging.logger { }
|
||||||
|
|
||||||
abstract class FileProcessingPipeline {
|
abstract class FileProcessingPipeline(private val force: Boolean = false) : KoinComponent {
|
||||||
|
|
||||||
|
|
||||||
protected abstract val fileProcessor: List<FileProcessor>
|
protected abstract val fileProcessor: List<FileProcessor>
|
||||||
protected abstract val progressBarFactory: ProgressBarFactory
|
private val progressBarFactory: ProgressBarFactory by inject()
|
||||||
suspend fun input(file: File) {
|
suspend fun input(file: File) {
|
||||||
var currentFile = file
|
var currentFile = file
|
||||||
fileProcessor.forEach { processor ->
|
fileProcessor.forEach { processor ->
|
||||||
val target = processor.willProduce(currentFile.toPath())
|
val target = processor.willProduce(currentFile.toPath())
|
||||||
if(target.exists()) {
|
if(target.exists() && !force) {
|
||||||
Log.info { "$target exists. Skipping" }
|
Log.info { "$target exists. Skipping" }
|
||||||
} else {
|
} else {
|
||||||
Log.info { "$target does not exists. Creating" }
|
Log.info { "$target does not exists. Creating" }
|
||||||
val resource = FileResource(currentFile)
|
val resource = FileResource(currentFile)
|
||||||
val progress = ProgressResource(resource, progressBarFactory)
|
val progress = ProgressResource(resource, progressBarFactory)
|
||||||
processor.process(progress)
|
processor.process(progress)
|
||||||
|
Log.info { "File created: $target" }
|
||||||
}
|
}
|
||||||
currentFile = target.toFile()
|
currentFile = target.toFile()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,10 @@ package de.itkl.fileprocessing
|
||||||
|
|
||||||
interface ProgressBarFactory {
|
interface ProgressBarFactory {
|
||||||
fun new(resource: Resource): ProgressBar
|
fun new(resource: Resource): ProgressBar
|
||||||
|
fun new(name: String, max: Long): ProgressBar
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ProgressBar : AutoCloseable {
|
interface ProgressBar : AutoCloseable {
|
||||||
fun update(bytesRead: Long)
|
fun update(progressed: Long)
|
||||||
|
fun step()
|
||||||
}
|
}
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
package de.itkl.processing
|
|
||||||
|
|
||||||
import kotlinx.coroutines.Dispatchers
|
|
||||||
import kotlinx.coroutines.flow.Flow
|
|
||||||
import kotlinx.coroutines.flow.flow
|
|
||||||
import kotlinx.coroutines.flow.map
|
|
||||||
import kotlinx.coroutines.flow.toList
|
|
||||||
import kotlinx.coroutines.runBlocking
|
|
||||||
import kotlinx.coroutines.withContext
|
|
||||||
import java.util.concurrent.Executors
|
|
||||||
import java.util.concurrent.TimeUnit
|
|
||||||
|
|
||||||
|
|
||||||
@Suppress("UNCHECKED_CAST")
|
|
||||||
class ParallelFlowProcessor<T,U>(
|
|
||||||
private val mapperFn: (T) -> U) {
|
|
||||||
companion object {
|
|
||||||
private val workers = Executors.newWorkStealingPool(16)
|
|
||||||
}
|
|
||||||
|
|
||||||
suspend fun process(flow: Flow<T>): Flow<U> {
|
|
||||||
return flow {
|
|
||||||
flow.map { kotlinx.coroutines.Runnable {
|
|
||||||
val result = mapperFn(it)
|
|
||||||
runBlocking { emit(result) }
|
|
||||||
} }
|
|
||||||
.map { job -> workers.submit(job)}
|
|
||||||
.toList()
|
|
||||||
.forEach { future -> emit(future.get() as U) }
|
|
||||||
withContext(Dispatchers.IO) {
|
|
||||||
workers.awaitTermination(10000, TimeUnit.DAYS)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,43 @@
|
||||||
|
package de.itkl.processing
|
||||||
|
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import kotlinx.coroutines.*
|
||||||
|
import kotlinx.coroutines.channels.Channel
|
||||||
|
import kotlinx.coroutines.channels.consumeEach
|
||||||
|
import kotlinx.coroutines.flow.*
|
||||||
|
|
||||||
|
private val Log = KotlinLogging.logger { }
|
||||||
|
class ParallelUnorderedFlow<U>(
|
||||||
|
private val mapperFlow: Flow<U>
|
||||||
|
) : Flow<U> {
|
||||||
|
override suspend fun collect(collector: FlowCollector<U>) {
|
||||||
|
mapperFlow.collect(collector)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
suspend fun <T : Any, U : Any> Flow<T>.parallelUnordered(
|
||||||
|
scope: CoroutineScope,
|
||||||
|
numWorkers: Int,
|
||||||
|
mapperFn: (T) -> U): Flow<U> {
|
||||||
|
|
||||||
|
val producerChannel = Channel<T>()
|
||||||
|
|
||||||
|
scope.launch(Dispatchers.Default) {
|
||||||
|
collect {
|
||||||
|
producerChannel.send(it)
|
||||||
|
}
|
||||||
|
producerChannel.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
val mapperFlow = channelFlow {
|
||||||
|
(0..numWorkers).map {
|
||||||
|
launch(Dispatchers.Default) {
|
||||||
|
producerChannel.consumeEach {
|
||||||
|
send(mapperFn(it))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ParallelUnorderedFlow(mapperFlow)
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
dependencies {
|
||||||
|
implementation("io.ktor:ktor-http-jvm:2.3.7")
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
package de.itkl.io.implementation
|
||||||
|
|
||||||
|
import de.itkl.io.interfaces.Resource
|
||||||
|
import io.ktor.http.*
|
||||||
|
import java.io.File
|
||||||
|
import java.io.InputStream
|
||||||
|
|
||||||
|
class FileSystemResource(private val file: File) : Resource() {
|
||||||
|
override val filename: String
|
||||||
|
get() = file.name
|
||||||
|
override val contentType: ContentType
|
||||||
|
get() = ContentType.fromFilePath(file.path).first()
|
||||||
|
override val length: Long
|
||||||
|
get() = file.length()
|
||||||
|
|
||||||
|
override fun doRead(): InputStream {
|
||||||
|
return file.inputStream()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
package de.itkl.io.interfaces
|
||||||
|
|
||||||
|
import io.ktor.http.*
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.get
|
||||||
|
import org.koin.core.qualifier.named
|
||||||
|
import java.io.InputStream
|
||||||
|
import java.io.InputStreamReader
|
||||||
|
|
||||||
|
abstract class Resource : KoinComponent {
|
||||||
|
abstract val filename: String
|
||||||
|
abstract val contentType: ContentType
|
||||||
|
abstract val length: Long?
|
||||||
|
|
||||||
|
protected abstract fun doRead(): InputStream
|
||||||
|
fun read(): InputStream {
|
||||||
|
return length?.let { length ->
|
||||||
|
get<ResourceReadDecorator>().decorate(
|
||||||
|
length = length,
|
||||||
|
read()
|
||||||
|
)
|
||||||
|
} ?: read()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
package de.itkl.io.interfaces
|
||||||
|
|
||||||
|
import java.io.InputStream
|
||||||
|
|
||||||
|
interface ResourceReadDecorator {
|
||||||
|
fun decorate(
|
||||||
|
length: Long,
|
||||||
|
inputStream: InputStream): InputStream
|
||||||
|
}
|
||||||
|
|
||||||
|
class NoopResourceReadDecorator : ResourceReadDecorator {
|
||||||
|
override fun decorate(length: Long, inputStream: InputStream): InputStream {
|
||||||
|
return inputStream
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
package de.itkl.io
|
||||||
|
|
||||||
|
import de.itkl.io.interfaces.NoopResourceReadDecorator
|
||||||
|
import de.itkl.io.interfaces.ResourceReadDecorator
|
||||||
|
import org.koin.dsl.module
|
||||||
|
|
||||||
|
val ioModule = module {
|
||||||
|
single<ResourceReadDecorator> { NoopResourceReadDecorator() }
|
||||||
|
}
|
||||||
|
|
@ -1,9 +1,6 @@
|
||||||
plugins {
|
|
||||||
id("docthor.kotlin-library-conventions")
|
|
||||||
}
|
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
api("org.apache.lucene:lucene-analysis-common:9.9.0")
|
api("org.apache.lucene:lucene-analysis-common:9.9.0")
|
||||||
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
|
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
|
||||||
|
implementation("com.google.guava:guava:32.1.3-jre")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,8 @@ class BagOfWords(private val data: MutableSet<String> = mutableSetOf()) : Iterab
|
||||||
}
|
}
|
||||||
|
|
||||||
fun join(bagOfWords: BagOfWords): BagOfWords {
|
fun join(bagOfWords: BagOfWords): BagOfWords {
|
||||||
return BagOfWords(data.toMutableSet().apply { addAll(bagOfWords.data) })
|
data.addAll(bagOfWords.data)
|
||||||
|
return this
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun iterator(): Iterator<String> {
|
override fun iterator(): Iterator<String> {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
package de.itkl.textprocessing
|
||||||
|
|
||||||
|
class DocumentContainer {
|
||||||
|
}
|
||||||
|
|
@ -11,10 +11,16 @@ class Histogram(private val histo: MutableMap<String,UInt> = mutableMapOf()) : I
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun fromBagOfWords(bagOfWords: BagOfWords): Histogram {
|
||||||
|
val result = Histogram()
|
||||||
|
bagOfWords.forEach(result::add)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
suspend fun fromBagOfWords(flow: Flow<BagOfWords>): Histogram {
|
suspend fun fromBagOfWords(flow: Flow<BagOfWords>): Histogram {
|
||||||
val result = Histogram()
|
val result = Histogram()
|
||||||
flow.collectIndexed { index, value ->
|
flow.collect() { value ->
|
||||||
println(index)
|
|
||||||
value.forEach(result::add)
|
value.forEach(result::add)
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
|
|
@ -27,12 +33,24 @@ class Histogram(private val histo: MutableMap<String,UInt> = mutableMapOf()) : I
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun join(other: Histogram): Histogram {
|
||||||
|
other.forEach { (word, count) ->
|
||||||
|
histo.merge(word, count) { a,b -> a + b }
|
||||||
|
}
|
||||||
|
return this
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
fun add(word: String) {
|
fun add(word: String) {
|
||||||
histo.compute(word) { _, count ->
|
histo.compute(word) { _, count ->
|
||||||
count?.let { it + 1u } ?: 1u
|
count?.let { it + 1u } ?: 1u
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun set(word: String, count: Int) {
|
||||||
|
histo[word] = count.toUInt()
|
||||||
|
}
|
||||||
|
|
||||||
val size get() = histo.size
|
val size get() = histo.size
|
||||||
override fun iterator(): Iterator<Pair<String, UInt>> {
|
override fun iterator(): Iterator<Pair<String, UInt>> {
|
||||||
return iterator {
|
return iterator {
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,19 @@
|
||||||
package de.itkl.textprocessing
|
package de.itkl.textprocessing
|
||||||
|
|
||||||
|
import kotlinx.coroutines.Dispatchers
|
||||||
import kotlinx.coroutines.flow.Flow
|
import kotlinx.coroutines.flow.Flow
|
||||||
import kotlinx.coroutines.flow.flow
|
import kotlinx.coroutines.flow.flow
|
||||||
import kotlinx.coroutines.flow.onCompletion
|
import kotlinx.coroutines.flow.onCompletion
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer
|
import kotlinx.coroutines.withContext
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
|
|
||||||
import org.apache.lucene.util.AttributeFactory
|
|
||||||
import java.io.File
|
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
import java.io.InputStreamReader
|
import java.io.InputStreamReader
|
||||||
|
|
||||||
|
|
||||||
class TextFile(val inputStream: InputStream) {
|
class TextFile(private val inputStream: InputStream) {
|
||||||
|
|
||||||
fun splitByEmptyLines(): Flow<List<String>> {
|
fun splitByEmptyLines(): Flow<List<String>> {
|
||||||
val reader = InputStreamReader(inputStream)
|
val reader = InputStreamReader(inputStream)
|
||||||
var list = mutableListOf<String>()
|
var list = mutableListOf<String>()
|
||||||
return flow {
|
return flow<List<String>> {
|
||||||
reader.useLines { lines ->
|
reader.useLines { lines ->
|
||||||
lines.forEach { line ->
|
lines.forEach { line ->
|
||||||
if(line.isEmpty()) {
|
if(line.isEmpty()) {
|
||||||
|
|
@ -27,21 +24,9 @@ class TextFile(val inputStream: InputStream) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}.onCompletion {
|
||||||
|
withContext(Dispatchers.IO) {
|
||||||
|
reader.close()
|
||||||
|
} }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// fun words(progressOp: (read: Long) -> Unit = {}): Flow<String> {
|
|
||||||
// val factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY
|
|
||||||
// val tokenizer = StandardTokenizer(factory)
|
|
||||||
// val reader = ProgressInputStream(file.inputStream(), progressOp)
|
|
||||||
// tokenizer.setReader(InputStreamReader(reader))
|
|
||||||
// tokenizer.reset()
|
|
||||||
// val attr = tokenizer.addAttribute(CharTermAttribute::class.java)
|
|
||||||
// return flow {
|
|
||||||
// while (kotlin.runCatching { tokenizer.incrementToken() }.getOrElse { true } ) {
|
|
||||||
// emit(attr.toString())
|
|
||||||
// }
|
|
||||||
// }.onCompletion {
|
|
||||||
// tokenizer.close()
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
|
|
@ -1,23 +1,21 @@
|
||||||
package de.itkl.textprocessing
|
package de.itkl.textprocessing.implementation
|
||||||
|
|
||||||
import kotlinx.coroutines.flow.Flow
|
import de.itkl.textprocessing.interfaces.Tokenizer
|
||||||
import kotlinx.coroutines.flow.flow
|
|
||||||
import kotlinx.coroutines.flow.onCompletion
|
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer
|
import org.apache.lucene.analysis.standard.StandardTokenizer
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
|
||||||
import org.apache.lucene.util.AttributeFactory
|
import org.apache.lucene.util.AttributeFactory
|
||||||
import java.io.StringReader
|
import java.io.StringReader
|
||||||
|
|
||||||
|
|
||||||
class Tokenizer {
|
class LuceneTokenizer : Tokenizer {
|
||||||
|
|
||||||
private val tokenizer by lazy {
|
private val tokenizer by lazy {
|
||||||
val factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY
|
val factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY
|
||||||
val tokenizer = StandardTokenizer(factory)
|
val tokenizer = StandardTokenizer(factory)
|
||||||
tokenizer
|
tokenizer
|
||||||
}
|
}
|
||||||
fun tokenize(input: String): Sequence<String> {
|
override fun tokenize(text: String): Sequence<String> {
|
||||||
val reader = StringReader(input)
|
val reader = StringReader(text)
|
||||||
tokenizer.setReader(reader)
|
tokenizer.setReader(reader)
|
||||||
tokenizer.reset()
|
tokenizer.reset()
|
||||||
val attr = tokenizer.addAttribute(CharTermAttribute::class.java)
|
val attr = tokenizer.addAttribute(CharTermAttribute::class.java)
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
package de.itkl.textprocessing.implementation
|
||||||
|
|
||||||
|
import de.itkl.textprocessing.interfaces.Stemmer
|
||||||
|
import org.tartarus.snowball.ext.GermanStemmer
|
||||||
|
|
||||||
|
class SnowballStemmerGerman : Stemmer {
|
||||||
|
private val german = GermanStemmer()
|
||||||
|
override fun stem(word: String): String {
|
||||||
|
german.current = word
|
||||||
|
german.stem()
|
||||||
|
return german.current
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
package de.itkl.textprocessing.interfaces
|
||||||
|
|
||||||
|
interface DocumentAssetManager {}
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
package de.itkl.textprocessing.interfaces
|
||||||
|
|
||||||
|
interface DocumentExtractor {
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
package de.itkl.textprocessing.interfaces
|
||||||
|
|
||||||
|
interface Stemmer {
|
||||||
|
fun stem(word: String): String
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
package de.itkl.textprocessing.interfaces
|
||||||
|
|
||||||
|
interface Tokenizer {
|
||||||
|
fun tokenize(text: String): Sequence<String>
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
package de.itkl.textprocessing
|
||||||
|
|
||||||
|
import de.itkl.textprocessing.implementation.LuceneTokenizer
|
||||||
|
import de.itkl.textprocessing.implementation.SnowballStemmerGerman
|
||||||
|
import de.itkl.textprocessing.interfaces.Stemmer
|
||||||
|
import de.itkl.textprocessing.interfaces.Tokenizer
|
||||||
|
import org.koin.dsl.module
|
||||||
|
|
||||||
|
val textProcessingModule = module {
|
||||||
|
factory<Tokenizer> { LuceneTokenizer() }
|
||||||
|
factory<Stemmer> { SnowballStemmerGerman() }
|
||||||
|
}
|
||||||
|
|
@ -1,10 +1,7 @@
|
||||||
plugins {
|
|
||||||
id("docthor.kotlin-library-conventions")
|
|
||||||
}
|
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
api(project(":libraries:textprocessing"))
|
api(project(":libraries:textprocessing"))
|
||||||
api(project(":libraries:fileprocessing"))
|
api(project(":libraries:fileprocessing"))
|
||||||
implementation("com.github.ajalt.mordant:mordant:2.2.0")
|
implementation("com.github.ajalt.mordant:mordant:2.2.0")
|
||||||
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
|
implementation("com.github.doyaaaaaken:kotlin-csv-jvm:1.9.2")
|
||||||
|
implementation("com.google.guava:guava:32.1.3-jre")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
package de.itkl.tfidf
|
||||||
|
|
||||||
|
import de.itkl.fileprocessing.FileProcessor
|
||||||
|
import de.itkl.fileprocessing.Resource
|
||||||
|
import de.itkl.processing.parallelUnordered
|
||||||
|
import de.itkl.textprocessing.*
|
||||||
|
import de.itkl.textprocessing.interfaces.Stemmer
|
||||||
|
import de.itkl.textprocessing.interfaces.Tokenizer
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import kotlinx.coroutines.*
|
||||||
|
import kotlinx.coroutines.flow.*
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.inject
|
||||||
|
import java.io.File
|
||||||
|
import java.nio.file.Path
|
||||||
|
import kotlin.io.path.nameWithoutExtension
|
||||||
|
import kotlin.math.max
|
||||||
|
|
||||||
|
private val Log = KotlinLogging.logger { }
|
||||||
|
|
||||||
|
class DocumentFrequency : FileProcessor, KoinComponent {
|
||||||
|
override fun willProduce(path: Path): Path {
|
||||||
|
return path.parent.resolve(path.nameWithoutExtension + "-document-frequency.csv")
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun process(resource: Resource): File = coroutineScope {
|
||||||
|
Log.info { "Would produce: ${willProduce(resource.path)}" }
|
||||||
|
val resultFile = willProduce(resource.path).toFile()
|
||||||
|
val (numDocs, histogram) = TextFile(resource.read())
|
||||||
|
.splitByEmptyLines()
|
||||||
|
.withIndex()
|
||||||
|
.parallelUnordered(this, 16) { (index, doc) ->
|
||||||
|
val result = collectWordsOfDocument(doc)
|
||||||
|
index to result
|
||||||
|
}
|
||||||
|
.reduce { (index, acc), (otherIndex, other) -> max(index, otherIndex) to acc.join(other)}
|
||||||
|
Log.info { "Writing CSV $resultFile" }
|
||||||
|
histogram.set("\$numDocs", numDocs)
|
||||||
|
HistogramCsvStorage().save(histogram, resultFile)
|
||||||
|
resultFile
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun collectWordsOfDocument(document: List<String>): Histogram {
|
||||||
|
if (document.isEmpty()) {
|
||||||
|
return Histogram()
|
||||||
|
}
|
||||||
|
val tokenizer: Tokenizer by inject()
|
||||||
|
val stemmer: Stemmer by inject()
|
||||||
|
val bagOfWords = document.map { line ->
|
||||||
|
val tokens = tokenizer.tokenize(line)
|
||||||
|
BagOfWords.from(tokens.map { stemmer.stem(it) })
|
||||||
|
}
|
||||||
|
.reduce { acc, bagOfWords -> acc.join(bagOfWords) }
|
||||||
|
return Histogram.fromBagOfWords(bagOfWords)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
package de.itkl.tfidf
|
|
||||||
|
|
||||||
import com.github.ajalt.mordant.terminal.Terminal
|
|
||||||
import de.itkl.fileprocessing.FileProcessor
|
|
||||||
import de.itkl.fileprocessing.Resource
|
|
||||||
import de.itkl.processing.ParallelFlowProcessor
|
|
||||||
import de.itkl.textprocessing.*
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
|
||||||
import kotlinx.coroutines.flow.map
|
|
||||||
import kotlinx.coroutines.flow.reduce
|
|
||||||
import kotlinx.coroutines.flow.take
|
|
||||||
import java.io.File
|
|
||||||
import java.nio.file.Path
|
|
||||||
import kotlin.io.path.nameWithoutExtension
|
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
|
||||||
|
|
||||||
class Idf : FileProcessor {
|
|
||||||
override fun willProduce(path: Path): Path {
|
|
||||||
return path.parent.resolve(path.nameWithoutExtension + "-idf.csv")
|
|
||||||
}
|
|
||||||
|
|
||||||
override suspend fun process(resource: Resource): File {
|
|
||||||
Log.info { "Would produce: ${willProduce(resource.path)}" }
|
|
||||||
val resultFile = willProduce(resource.path).toFile()
|
|
||||||
val textFile = TextFile(resource.read())
|
|
||||||
val documents = textFile.splitByEmptyLines()
|
|
||||||
val bagOfWords = ParallelFlowProcessor<List<String>, BagOfWords>(
|
|
||||||
mapperFn = { document ->
|
|
||||||
val tokenizer = Tokenizer()
|
|
||||||
val bagOfWords = document.map { line ->
|
|
||||||
val tokens = tokenizer.tokenize(line)
|
|
||||||
BagOfWords.from(tokens)
|
|
||||||
}
|
|
||||||
.reduce { acc, bagOfWords -> acc.join(bagOfWords) }
|
|
||||||
bagOfWords
|
|
||||||
}
|
|
||||||
).process(documents)
|
|
||||||
|
|
||||||
|
|
||||||
val histogram = Histogram.fromBagOfWords(bagOfWords)
|
|
||||||
HistogramCsvStorage().save(histogram, resultFile)
|
|
||||||
return resultFile
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,47 @@
|
||||||
|
package de.itkl.tfidf
|
||||||
|
|
||||||
|
import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter
|
||||||
|
import de.itkl.fileprocessing.FileProcessor
|
||||||
|
import de.itkl.fileprocessing.ProgressBarFactory
|
||||||
|
import de.itkl.fileprocessing.Resource
|
||||||
|
import de.itkl.textprocessing.HistogramCsvStorage
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.inject
|
||||||
|
import java.io.File
|
||||||
|
import java.nio.file.Path
|
||||||
|
import kotlin.io.path.nameWithoutExtension
|
||||||
|
import kotlin.math.ln
|
||||||
|
import kotlin.math.log
|
||||||
|
import kotlin.math.log10
|
||||||
|
import kotlin.math.log2
|
||||||
|
|
||||||
|
private val Log = KotlinLogging.logger { }
|
||||||
|
|
||||||
|
class InverseDocumentFrequency : FileProcessor, KoinComponent {
|
||||||
|
override fun willProduce(path: Path): Path {
|
||||||
|
return path.parent.resolve(path.nameWithoutExtension + "-inverse-document-frequency.csv")
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun process(resource: Resource): File {
|
||||||
|
val histogram = HistogramCsvStorage().read(resource.toFile())
|
||||||
|
val numDocs = histogram
|
||||||
|
.find { (word, count) -> word == "\$numDocs" }!!
|
||||||
|
.second.toInt()
|
||||||
|
val progressBarFactory: ProgressBarFactory by inject()
|
||||||
|
return progressBarFactory.new("compute idf", histogram.size.toLong()).use { progess ->
|
||||||
|
csvWriter().openAsync(willProduce(resource.path).toFile(), append = false) {
|
||||||
|
writeRow("word", "idf")
|
||||||
|
histogram.forEach { (word, count) ->
|
||||||
|
writeRow(word, idf(numDocs, count))
|
||||||
|
progess.step()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resource.path.toFile()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun idf(numDocs: Int, count: UInt): Double {
|
||||||
|
return log10(numDocs / count.toDouble())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -19,6 +19,18 @@ class TerminalProgressBarFactory : ProgressBarFactory {
|
||||||
}
|
}
|
||||||
return TerminalProgressBar(animation, resource.length())
|
return TerminalProgressBar(animation, resource.length())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
override fun new(name: String, max: Long): ProgressBar {
|
||||||
|
val animation = terminal.progressAnimation {
|
||||||
|
text(name)
|
||||||
|
percentage()
|
||||||
|
progressBar()
|
||||||
|
completed()
|
||||||
|
timeRemaining()
|
||||||
|
}
|
||||||
|
return TerminalProgressBar(animation, max)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class TerminalProgressBar(
|
class TerminalProgressBar(
|
||||||
|
|
@ -28,11 +40,16 @@ class TerminalProgressBar(
|
||||||
animation.start()
|
animation.start()
|
||||||
animation.updateTotal(total)
|
animation.updateTotal(total)
|
||||||
}
|
}
|
||||||
override fun update(bytesRead: Long) {
|
override fun update(progressed: Long) {
|
||||||
animation.update(bytesRead)
|
animation.update(progressed)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun step() {
|
||||||
|
animation.advance()
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun close() {
|
override fun close() {
|
||||||
animation.stop()
|
animation.stop()
|
||||||
|
println()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
package de.itkl.tfidf
|
|
||||||
|
|
||||||
import com.github.doyaaaaaken.kotlincsv.dsl.csvReader
|
|
||||||
import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter
|
|
||||||
import de.itkl.textprocessing.Histogram
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
|
||||||
import java.io.File
|
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
|
||||||
|
|
@ -1,55 +0,0 @@
|
||||||
package de.itkl.tfidf
|
|
||||||
|
|
||||||
import com.github.ajalt.mordant.terminal.Terminal
|
|
||||||
import de.itkl.textprocessing.Histogram
|
|
||||||
import de.itkl.textprocessing.HistogramCsvStorage
|
|
||||||
import de.itkl.textprocessing.TextFile
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
|
||||||
import kotlinx.coroutines.flow.map
|
|
||||||
import org.tartarus.snowball.SnowballStemmer
|
|
||||||
import org.tartarus.snowball.ext.GermanStemmer
|
|
||||||
import java.io.File
|
|
||||||
import kotlin.io.path.exists
|
|
||||||
|
|
||||||
|
|
||||||
private val Log = KotlinLogging.logger { }
|
|
||||||
//class TfIdf {
|
|
||||||
// suspend fun computeTf(
|
|
||||||
// corpus: File,
|
|
||||||
// language: Language
|
|
||||||
// ): Histogram {
|
|
||||||
// Log.info { "Processing $corpus" }
|
|
||||||
// val destination = corpus.toPath().parent.resolve("${corpus.nameWithoutExtension}-terms.csv")
|
|
||||||
//
|
|
||||||
// if(destination.exists()) {
|
|
||||||
// return HistogramCsvStorage().read(destination.toFile())
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// val filesize = corpus.length()
|
|
||||||
//
|
|
||||||
// val t = Terminal()
|
|
||||||
// val histogram = t.progressBar("Indexing ${corpus.name}", filesize) { val stemmer = stemmer(language)
|
|
||||||
// val words = TextFile(corpus).words {readBytes -> update(readBytes)}
|
|
||||||
// .map { stemmer.stem(it) }
|
|
||||||
// Histogram.from(words)
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// t.progressBar("Saving ${histogram.size} entries", histogram.size.toLong()) {
|
|
||||||
// HistogramCsvStorage()
|
|
||||||
// .save(histogram,destination.toFile()) { entriesWritten -> update(entriesWritten)}
|
|
||||||
// }
|
|
||||||
// return histogram
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private fun stemmer(language: Language): SnowballStemmer {
|
|
||||||
// return when(language) {
|
|
||||||
// Language.DE -> GermanStemmer()
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// private fun SnowballStemmer.stem(word: String): String {
|
|
||||||
// current = word
|
|
||||||
// stem()
|
|
||||||
// return current
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
|
|
@ -3,11 +3,11 @@ package de.itkl.tfidf
|
||||||
import de.itkl.fileprocessing.FileProcessingPipeline
|
import de.itkl.fileprocessing.FileProcessingPipeline
|
||||||
import de.itkl.fileprocessing.FileProcessor
|
import de.itkl.fileprocessing.FileProcessor
|
||||||
import de.itkl.fileprocessing.ProgressBarFactory
|
import de.itkl.fileprocessing.ProgressBarFactory
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
|
||||||
class TfIdfPipeline(private val language: Language) : FileProcessingPipeline() {
|
class TfIdfPipeline(force: Boolean) : FileProcessingPipeline(force) {
|
||||||
override val fileProcessor = listOf(
|
override val fileProcessor = listOf<FileProcessor>(
|
||||||
Idf()
|
DocumentFrequency(),
|
||||||
|
InverseDocumentFrequency()
|
||||||
)
|
)
|
||||||
override val progressBarFactory: ProgressBarFactory
|
|
||||||
get() = TerminalProgressBarFactory()
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,21 +0,0 @@
|
||||||
package de.itkl.tfidf
|
|
||||||
|
|
||||||
import com.github.ajalt.mordant.animation.ProgressAnimation
|
|
||||||
import com.github.ajalt.mordant.animation.progressAnimation
|
|
||||||
import com.github.ajalt.mordant.terminal.Terminal
|
|
||||||
import java.awt.SystemColor.text
|
|
||||||
|
|
||||||
suspend fun <T> Terminal.progressBar(name: String, overall: Long, context: suspend ProgressAnimation.() -> T):T {
|
|
||||||
val progress = progressAnimation {
|
|
||||||
text(name)
|
|
||||||
percentage()
|
|
||||||
progressBar()
|
|
||||||
completed()
|
|
||||||
timeRemaining()
|
|
||||||
}
|
|
||||||
progress.start()
|
|
||||||
progress.updateTotal(overall)
|
|
||||||
val result = context(progress)
|
|
||||||
progress.stop()
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
@ -1,15 +1,25 @@
|
||||||
pluginManagement {
|
//pluginManagement {
|
||||||
includeBuild("build-logic")
|
// includeBuild("build-logic")
|
||||||
}
|
//}
|
||||||
|
|
||||||
plugins {
|
plugins {
|
||||||
id("org.gradle.toolchains.foojay-resolver-convention") version "0.4.0"
|
id("org.gradle.toolchains.foojay-resolver-convention") version "0.4.0"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun includeDir(path: String) {
|
||||||
|
file(path)
|
||||||
|
.listFiles()!!
|
||||||
|
.filter { it.isDirectory }
|
||||||
|
.filter { dir ->
|
||||||
|
dir.resolve("build.gradle.kts").exists() }
|
||||||
|
.forEach { dir ->
|
||||||
|
val includeString = listOf(path, dir.name).joinToString(":")
|
||||||
|
include(includeString)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
rootProject.name = "docthor"
|
rootProject.name = "docthor"
|
||||||
include(
|
include(
|
||||||
"app",
|
"app",
|
||||||
"libraries:tfidf",
|
|
||||||
"libraries:textprocessing",
|
|
||||||
"libraries:fileprocessing",
|
|
||||||
)
|
)
|
||||||
|
includeDir("libraries")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue