diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index 37d2b48..691a9fc 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -7,5 +7,5 @@ repositories { } dependencies { - implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:1.8.20") + implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:$embeddedKotlinVersion") } diff --git a/buildSrc/src/main/kotlin/docthor.kotlin-common-conventions.gradle.kts b/buildSrc/src/main/kotlin/docthor.kotlin-common-conventions.gradle.kts index 03ff015..ab74060 100644 --- a/buildSrc/src/main/kotlin/docthor.kotlin-common-conventions.gradle.kts +++ b/buildSrc/src/main/kotlin/docthor.kotlin-common-conventions.gradle.kts @@ -13,6 +13,11 @@ dependencies { val koin_version = "3.5.3" implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3") implementation("io.insert-koin:koin-core:$koin_version") + implementation("org.jetbrains.kotlinx:kotlinx-datetime:0.5.0") + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2") + + + testImplementation("io.insert-koin:koin-test:$koin_version") } java { diff --git a/libraries/clients/build.gradle.kts b/libraries/clients/build.gradle.kts deleted file mode 100644 index e69de29..0000000 diff --git a/libraries/clients/src/main/kotlin/de/itkl/clients/MsOcr.kt b/libraries/clients/src/main/kotlin/de/itkl/clients/MsOcr.kt deleted file mode 100644 index f2a5ac7..0000000 --- a/libraries/clients/src/main/kotlin/de/itkl/clients/MsOcr.kt +++ /dev/null @@ -1,6 +0,0 @@ -package de.itkl.clients - -class MsOcr { - - suspend fun ocr() {} -} \ No newline at end of file diff --git a/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/data/DataTable.kt b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/data/DataTable.kt new file mode 100644 index 0000000..196bb02 --- /dev/null +++ b/libraries/core-api/src/main/kotlin/de/itkl/core_api/interfaces/data/DataTable.kt @@ -0,0 +1,4 @@ +package de.itkl.core_api.interfaces.data +interface DataTable : Iterable> { + val columns: List +} \ No newline at end of file diff --git a/libraries/httpClient/build.gradle.kts b/libraries/httpClient/build.gradle.kts new file mode 100644 index 0000000..5ee7f02 --- /dev/null +++ b/libraries/httpClient/build.gradle.kts @@ -0,0 +1,15 @@ +plugins { + kotlin("plugin.serialization") version embeddedKotlinVersion +} + +val ktorVersion: String by project + +dependencies { + api(project(":libraries:core-api")) + + api("io.ktor:ktor-client-core:$ktorVersion") + api("io.ktor:ktor-client-core-jvm:$ktorVersion") + implementation("io.ktor:ktor-client-cio:$ktorVersion") + implementation("io.ktor:ktor-client-content-negotiation:$ktorVersion") + implementation("io.ktor:ktor-serialization-kotlinx-json:$ktorVersion") +} \ No newline at end of file diff --git a/libraries/httpClient/gradle.properties b/libraries/httpClient/gradle.properties new file mode 100644 index 0000000..79e625c --- /dev/null +++ b/libraries/httpClient/gradle.properties @@ -0,0 +1 @@ +ktorVersion=2.3.7 \ No newline at end of file diff --git a/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcr.kt b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcr.kt new file mode 100644 index 0000000..ea56045 --- /dev/null +++ b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcr.kt @@ -0,0 +1,30 @@ +package de.itkl.httpClient.clients + +import de.itkl.core_api.interfaces.Resource +import io.github.oshai.kotlinlogging.KotlinLogging +import io.ktor.client.* +import io.ktor.client.call.* +import io.ktor.client.request.* +import io.ktor.client.statement.* +import io.ktor.http.* +import org.koin.core.component.KoinComponent +import org.koin.core.component.inject + +private val Log = KotlinLogging.logger { } +class MsOcr: KoinComponent { + private val httpClient: HttpClient by inject() + + suspend fun ocr(resource: Resource): MsOcrResponse { + val response = httpClient.post { + url("http://10.54.150.152:5000/vision/v3.2/read/syncAnalyze") + parameters { + append("language", "de") + append("readingOrder", "natural") + } + contentType(resource.contentType) + setBody(resource.read()) + } + println("got response: ${response.status} in ${response.responseTime}") + return response.body() + } +} \ No newline at end of file diff --git a/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcrResponse.kt b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcrResponse.kt new file mode 100644 index 0000000..835cad6 --- /dev/null +++ b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/clients/MsOcrResponse.kt @@ -0,0 +1,81 @@ +package de.itkl.httpClient.clients + + +import kotlinx.datetime.Instant +import kotlinx.datetime.LocalDateTime +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +@Serializable +data class MsOcrResponse( + @SerialName("analyzeResult") + val analyzeResult: AnalyzeResult, + @SerialName("createdDateTime") + val createdDateTime: Instant, // 2023-12-29T21:02:30Z + @SerialName("lastUpdatedDateTime") + val lastUpdatedDateTime: Instant, // 2023-12-29T21:02:31Z + @SerialName("status") + val status: String // succeeded +) { + @Serializable + data class AnalyzeResult( + @SerialName("modelVersion") + val modelVersion: String, // 2022-04-30 + @SerialName("readResults") + val readResults: List, + @SerialName("version") + val version: String // 3.2.0 + ) { + @Serializable + data class ReadResult( + @SerialName("angle") + val angle: Int, // 0 + @SerialName("height") + val height: Int, // 3507 + @SerialName("lines") + val lines: List, + @SerialName("page") + val page: Int, // 1 + @SerialName("unit") + val unit: String, // pixel + @SerialName("width") + val width: Int // 2481 + ) { + @Serializable + data class Line( + @SerialName("appearance") + val appearance: Appearance, + @SerialName("boundingBox") + val boundingBox: List, + @SerialName("text") + val text: String, // Franz Mustermann + @SerialName("words") + val words: List + ) { + @Serializable + data class Appearance( + @SerialName("style") + val style: Style + ) { + @Serializable + data class Style( + @SerialName("confidence") + val confidence: Double, // 0.972 + @SerialName("name") + val name: String // other + ) + } + + @Serializable + data class Word( + @SerialName("boundingBox") + val boundingBox: List, + @SerialName("confidence") + val confidence: Double, // 0.998 + @SerialName("text") + val text: String // Franz + ) + } + } + } +} \ No newline at end of file diff --git a/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/createHttpClient.kt b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/createHttpClient.kt new file mode 100644 index 0000000..5756468 --- /dev/null +++ b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/createHttpClient.kt @@ -0,0 +1,14 @@ +package de.itkl.httpClient + +import io.ktor.client.* +import io.ktor.client.engine.cio.* +import io.ktor.client.plugins.contentnegotiation.* +import io.ktor.serialization.kotlinx.json.* + +fun createHttpClient(): HttpClient { + return HttpClient(CIO) { + install(ContentNegotiation) { + json() + } + } +} \ No newline at end of file diff --git a/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/httpClientModule.kt b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/httpClientModule.kt new file mode 100644 index 0000000..1778158 --- /dev/null +++ b/libraries/httpClient/src/main/kotlin/de/itkl/httpClient/httpClientModule.kt @@ -0,0 +1,10 @@ +package de.itkl.httpClient + +import de.itkl.httpClient.clients.MsOcr +import io.ktor.client.* +import org.koin.dsl.module + +val httpClientModule = module { + single { createHttpClient() } + single { MsOcr() } +} \ No newline at end of file diff --git a/libraries/httpClient/src/test/kotlin/de/itkl/httpClient/clients/MsOcrTest.kt b/libraries/httpClient/src/test/kotlin/de/itkl/httpClient/clients/MsOcrTest.kt new file mode 100644 index 0000000..10124b4 --- /dev/null +++ b/libraries/httpClient/src/test/kotlin/de/itkl/httpClient/clients/MsOcrTest.kt @@ -0,0 +1,36 @@ +package de.itkl.httpClient.clients + +import de.itkl.core_api.coreApiModule +import de.itkl.core_api.implementation.FileResource +import de.itkl.core_api.interfaces.Resource +import de.itkl.httpClient.httpClientModule +import kotlinx.coroutines.runBlocking +import org.junit.Rule +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import org.koin.core.component.inject +import org.koin.core.context.startKoin +import org.koin.test.KoinTest +import java.nio.file.Paths + +class MsOcrTest : KoinTest { + + @BeforeEach + fun start() { + startKoin { + printLogger() + modules( + coreApiModule, + httpClientModule) + } + } + + @Test + fun `can create a request`() = runBlocking { + val msOcrClient: MsOcr by inject() + val resource = FileResource(Paths.get("../../assets/xs-reg/00001.jpg").toAbsolutePath()) + val response = msOcrClient.ocr(resource) + println(response) + Unit + } +} \ No newline at end of file diff --git a/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/Histogram.kt b/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/Histogram.kt index 340f225..3c1a3bf 100644 --- a/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/Histogram.kt +++ b/libraries/textprocessing/src/main/kotlin/de/itkl/textprocessing/Histogram.kt @@ -2,30 +2,16 @@ package de.itkl.textprocessing import kotlinx.coroutines.flow.* -class Histogram(private val histo: MutableMap = mutableMapOf()) : Iterable>{ - +class Histogram( + private val histo: MutableMap = mutableMapOf() +) : Iterable>{ companion object { - suspend fun from(flow: Flow): Histogram { - return Histogram().apply { - flow.collect(this::add) - } - } - fun fromBagOfWords(bagOfWords: BagOfWords): Histogram { val result = Histogram() bagOfWords.forEach(result::add) return result } - - suspend fun fromBagOfWords(flow: Flow): Histogram { - val result = Histogram() - flow.collect() { value -> - value.forEach(result::add) - } - return result - } - fun from(sequence: Sequence>): Histogram { val histo = sequence.associate { map -> map["word"]!! to map["count"]!!.toUInt() } .toMutableMap()