starting with ms ocr client

1
Timo Bryant 2023-12-29 22:20:33 +01:00
parent 30dc3b658d
commit 9f3813a83a
13 changed files with 200 additions and 24 deletions

View File

@ -7,5 +7,5 @@ repositories {
}
dependencies {
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:1.8.20")
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:$embeddedKotlinVersion")
}

View File

@ -13,6 +13,11 @@ dependencies {
val koin_version = "3.5.3"
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3")
implementation("io.insert-koin:koin-core:$koin_version")
implementation("org.jetbrains.kotlinx:kotlinx-datetime:0.5.0")
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
testImplementation("io.insert-koin:koin-test:$koin_version")
}
java {

View File

@ -1,6 +0,0 @@
package de.itkl.clients
class MsOcr {
suspend fun ocr() {}
}

View File

@ -0,0 +1,4 @@
package de.itkl.core_api.interfaces.data
interface DataTable : Iterable<List<String>> {
val columns: List<String>
}

View File

@ -0,0 +1,15 @@
plugins {
kotlin("plugin.serialization") version embeddedKotlinVersion
}
val ktorVersion: String by project
dependencies {
api(project(":libraries:core-api"))
api("io.ktor:ktor-client-core:$ktorVersion")
api("io.ktor:ktor-client-core-jvm:$ktorVersion")
implementation("io.ktor:ktor-client-cio:$ktorVersion")
implementation("io.ktor:ktor-client-content-negotiation:$ktorVersion")
implementation("io.ktor:ktor-serialization-kotlinx-json:$ktorVersion")
}

View File

@ -0,0 +1 @@
ktorVersion=2.3.7

View File

@ -0,0 +1,30 @@
package de.itkl.httpClient.clients
import de.itkl.core_api.interfaces.Resource
import io.github.oshai.kotlinlogging.KotlinLogging
import io.ktor.client.*
import io.ktor.client.call.*
import io.ktor.client.request.*
import io.ktor.client.statement.*
import io.ktor.http.*
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
private val Log = KotlinLogging.logger { }
class MsOcr: KoinComponent {
private val httpClient: HttpClient by inject()
suspend fun ocr(resource: Resource): MsOcrResponse {
val response = httpClient.post {
url("http://10.54.150.152:5000/vision/v3.2/read/syncAnalyze")
parameters {
append("language", "de")
append("readingOrder", "natural")
}
contentType(resource.contentType)
setBody(resource.read())
}
println("got response: ${response.status} in ${response.responseTime}")
return response.body()
}
}

View File

@ -0,0 +1,81 @@
package de.itkl.httpClient.clients
import kotlinx.datetime.Instant
import kotlinx.datetime.LocalDateTime
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
@Serializable
data class MsOcrResponse(
@SerialName("analyzeResult")
val analyzeResult: AnalyzeResult,
@SerialName("createdDateTime")
val createdDateTime: Instant, // 2023-12-29T21:02:30Z
@SerialName("lastUpdatedDateTime")
val lastUpdatedDateTime: Instant, // 2023-12-29T21:02:31Z
@SerialName("status")
val status: String // succeeded
) {
@Serializable
data class AnalyzeResult(
@SerialName("modelVersion")
val modelVersion: String, // 2022-04-30
@SerialName("readResults")
val readResults: List<ReadResult>,
@SerialName("version")
val version: String // 3.2.0
) {
@Serializable
data class ReadResult(
@SerialName("angle")
val angle: Int, // 0
@SerialName("height")
val height: Int, // 3507
@SerialName("lines")
val lines: List<Line>,
@SerialName("page")
val page: Int, // 1
@SerialName("unit")
val unit: String, // pixel
@SerialName("width")
val width: Int // 2481
) {
@Serializable
data class Line(
@SerialName("appearance")
val appearance: Appearance,
@SerialName("boundingBox")
val boundingBox: List<Int>,
@SerialName("text")
val text: String, // Franz Mustermann
@SerialName("words")
val words: List<Word>
) {
@Serializable
data class Appearance(
@SerialName("style")
val style: Style
) {
@Serializable
data class Style(
@SerialName("confidence")
val confidence: Double, // 0.972
@SerialName("name")
val name: String // other
)
}
@Serializable
data class Word(
@SerialName("boundingBox")
val boundingBox: List<Int>,
@SerialName("confidence")
val confidence: Double, // 0.998
@SerialName("text")
val text: String // Franz
)
}
}
}
}

View File

@ -0,0 +1,14 @@
package de.itkl.httpClient
import io.ktor.client.*
import io.ktor.client.engine.cio.*
import io.ktor.client.plugins.contentnegotiation.*
import io.ktor.serialization.kotlinx.json.*
fun createHttpClient(): HttpClient {
return HttpClient(CIO) {
install(ContentNegotiation) {
json()
}
}
}

View File

@ -0,0 +1,10 @@
package de.itkl.httpClient
import de.itkl.httpClient.clients.MsOcr
import io.ktor.client.*
import org.koin.dsl.module
val httpClientModule = module {
single<HttpClient> { createHttpClient() }
single<MsOcr> { MsOcr() }
}

View File

@ -0,0 +1,36 @@
package de.itkl.httpClient.clients
import de.itkl.core_api.coreApiModule
import de.itkl.core_api.implementation.FileResource
import de.itkl.core_api.interfaces.Resource
import de.itkl.httpClient.httpClientModule
import kotlinx.coroutines.runBlocking
import org.junit.Rule
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.koin.core.component.inject
import org.koin.core.context.startKoin
import org.koin.test.KoinTest
import java.nio.file.Paths
class MsOcrTest : KoinTest {
@BeforeEach
fun start() {
startKoin {
printLogger()
modules(
coreApiModule,
httpClientModule)
}
}
@Test
fun `can create a request`() = runBlocking {
val msOcrClient: MsOcr by inject()
val resource = FileResource(Paths.get("../../assets/xs-reg/00001.jpg").toAbsolutePath())
val response = msOcrClient.ocr(resource)
println(response)
Unit
}
}

View File

@ -2,30 +2,16 @@ package de.itkl.textprocessing
import kotlinx.coroutines.flow.*
class Histogram(private val histo: MutableMap<String,UInt> = mutableMapOf()) : Iterable<Pair<String, UInt>>{
class Histogram(
private val histo: MutableMap<String,UInt> = mutableMapOf()
) : Iterable<Pair<String, UInt>>{
companion object {
suspend fun from(flow: Flow<String>): Histogram {
return Histogram().apply {
flow.collect(this::add)
}
}
fun fromBagOfWords(bagOfWords: BagOfWords): Histogram {
val result = Histogram()
bagOfWords.forEach(result::add)
return result
}
suspend fun fromBagOfWords(flow: Flow<BagOfWords>): Histogram {
val result = Histogram()
flow.collect() { value ->
value.forEach(result::add)
}
return result
}
fun from(sequence: Sequence<Map<String, String>>): Histogram {
val histo = sequence.associate { map -> map["word"]!! to map["count"]!!.toUInt() }
.toMutableMap()