starting with ms ocr client
parent
30dc3b658d
commit
9f3813a83a
|
|
@ -7,5 +7,5 @@ repositories {
|
||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:1.8.20")
|
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:$embeddedKotlinVersion")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,11 @@ dependencies {
|
||||||
val koin_version = "3.5.3"
|
val koin_version = "3.5.3"
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3")
|
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3")
|
||||||
implementation("io.insert-koin:koin-core:$koin_version")
|
implementation("io.insert-koin:koin-core:$koin_version")
|
||||||
|
implementation("org.jetbrains.kotlinx:kotlinx-datetime:0.5.0")
|
||||||
|
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
|
||||||
|
|
||||||
|
|
||||||
|
testImplementation("io.insert-koin:koin-test:$koin_version")
|
||||||
}
|
}
|
||||||
|
|
||||||
java {
|
java {
|
||||||
|
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
package de.itkl.clients
|
|
||||||
|
|
||||||
class MsOcr {
|
|
||||||
|
|
||||||
suspend fun ocr() {}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
package de.itkl.core_api.interfaces.data
|
||||||
|
interface DataTable : Iterable<List<String>> {
|
||||||
|
val columns: List<String>
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,15 @@
|
||||||
|
plugins {
|
||||||
|
kotlin("plugin.serialization") version embeddedKotlinVersion
|
||||||
|
}
|
||||||
|
|
||||||
|
val ktorVersion: String by project
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
api(project(":libraries:core-api"))
|
||||||
|
|
||||||
|
api("io.ktor:ktor-client-core:$ktorVersion")
|
||||||
|
api("io.ktor:ktor-client-core-jvm:$ktorVersion")
|
||||||
|
implementation("io.ktor:ktor-client-cio:$ktorVersion")
|
||||||
|
implementation("io.ktor:ktor-client-content-negotiation:$ktorVersion")
|
||||||
|
implementation("io.ktor:ktor-serialization-kotlinx-json:$ktorVersion")
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
ktorVersion=2.3.7
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
package de.itkl.httpClient.clients
|
||||||
|
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
import io.ktor.client.*
|
||||||
|
import io.ktor.client.call.*
|
||||||
|
import io.ktor.client.request.*
|
||||||
|
import io.ktor.client.statement.*
|
||||||
|
import io.ktor.http.*
|
||||||
|
import org.koin.core.component.KoinComponent
|
||||||
|
import org.koin.core.component.inject
|
||||||
|
|
||||||
|
private val Log = KotlinLogging.logger { }
|
||||||
|
class MsOcr: KoinComponent {
|
||||||
|
private val httpClient: HttpClient by inject()
|
||||||
|
|
||||||
|
suspend fun ocr(resource: Resource): MsOcrResponse {
|
||||||
|
val response = httpClient.post {
|
||||||
|
url("http://10.54.150.152:5000/vision/v3.2/read/syncAnalyze")
|
||||||
|
parameters {
|
||||||
|
append("language", "de")
|
||||||
|
append("readingOrder", "natural")
|
||||||
|
}
|
||||||
|
contentType(resource.contentType)
|
||||||
|
setBody(resource.read())
|
||||||
|
}
|
||||||
|
println("got response: ${response.status} in ${response.responseTime}")
|
||||||
|
return response.body()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,81 @@
|
||||||
|
package de.itkl.httpClient.clients
|
||||||
|
|
||||||
|
|
||||||
|
import kotlinx.datetime.Instant
|
||||||
|
import kotlinx.datetime.LocalDateTime
|
||||||
|
import kotlinx.serialization.SerialName
|
||||||
|
import kotlinx.serialization.Serializable
|
||||||
|
|
||||||
|
@Serializable
|
||||||
|
data class MsOcrResponse(
|
||||||
|
@SerialName("analyzeResult")
|
||||||
|
val analyzeResult: AnalyzeResult,
|
||||||
|
@SerialName("createdDateTime")
|
||||||
|
val createdDateTime: Instant, // 2023-12-29T21:02:30Z
|
||||||
|
@SerialName("lastUpdatedDateTime")
|
||||||
|
val lastUpdatedDateTime: Instant, // 2023-12-29T21:02:31Z
|
||||||
|
@SerialName("status")
|
||||||
|
val status: String // succeeded
|
||||||
|
) {
|
||||||
|
@Serializable
|
||||||
|
data class AnalyzeResult(
|
||||||
|
@SerialName("modelVersion")
|
||||||
|
val modelVersion: String, // 2022-04-30
|
||||||
|
@SerialName("readResults")
|
||||||
|
val readResults: List<ReadResult>,
|
||||||
|
@SerialName("version")
|
||||||
|
val version: String // 3.2.0
|
||||||
|
) {
|
||||||
|
@Serializable
|
||||||
|
data class ReadResult(
|
||||||
|
@SerialName("angle")
|
||||||
|
val angle: Int, // 0
|
||||||
|
@SerialName("height")
|
||||||
|
val height: Int, // 3507
|
||||||
|
@SerialName("lines")
|
||||||
|
val lines: List<Line>,
|
||||||
|
@SerialName("page")
|
||||||
|
val page: Int, // 1
|
||||||
|
@SerialName("unit")
|
||||||
|
val unit: String, // pixel
|
||||||
|
@SerialName("width")
|
||||||
|
val width: Int // 2481
|
||||||
|
) {
|
||||||
|
@Serializable
|
||||||
|
data class Line(
|
||||||
|
@SerialName("appearance")
|
||||||
|
val appearance: Appearance,
|
||||||
|
@SerialName("boundingBox")
|
||||||
|
val boundingBox: List<Int>,
|
||||||
|
@SerialName("text")
|
||||||
|
val text: String, // Franz Mustermann
|
||||||
|
@SerialName("words")
|
||||||
|
val words: List<Word>
|
||||||
|
) {
|
||||||
|
@Serializable
|
||||||
|
data class Appearance(
|
||||||
|
@SerialName("style")
|
||||||
|
val style: Style
|
||||||
|
) {
|
||||||
|
@Serializable
|
||||||
|
data class Style(
|
||||||
|
@SerialName("confidence")
|
||||||
|
val confidence: Double, // 0.972
|
||||||
|
@SerialName("name")
|
||||||
|
val name: String // other
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Serializable
|
||||||
|
data class Word(
|
||||||
|
@SerialName("boundingBox")
|
||||||
|
val boundingBox: List<Int>,
|
||||||
|
@SerialName("confidence")
|
||||||
|
val confidence: Double, // 0.998
|
||||||
|
@SerialName("text")
|
||||||
|
val text: String // Franz
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
package de.itkl.httpClient
|
||||||
|
|
||||||
|
import io.ktor.client.*
|
||||||
|
import io.ktor.client.engine.cio.*
|
||||||
|
import io.ktor.client.plugins.contentnegotiation.*
|
||||||
|
import io.ktor.serialization.kotlinx.json.*
|
||||||
|
|
||||||
|
fun createHttpClient(): HttpClient {
|
||||||
|
return HttpClient(CIO) {
|
||||||
|
install(ContentNegotiation) {
|
||||||
|
json()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
package de.itkl.httpClient
|
||||||
|
|
||||||
|
import de.itkl.httpClient.clients.MsOcr
|
||||||
|
import io.ktor.client.*
|
||||||
|
import org.koin.dsl.module
|
||||||
|
|
||||||
|
val httpClientModule = module {
|
||||||
|
single<HttpClient> { createHttpClient() }
|
||||||
|
single<MsOcr> { MsOcr() }
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
package de.itkl.httpClient.clients
|
||||||
|
|
||||||
|
import de.itkl.core_api.coreApiModule
|
||||||
|
import de.itkl.core_api.implementation.FileResource
|
||||||
|
import de.itkl.core_api.interfaces.Resource
|
||||||
|
import de.itkl.httpClient.httpClientModule
|
||||||
|
import kotlinx.coroutines.runBlocking
|
||||||
|
import org.junit.Rule
|
||||||
|
import org.junit.jupiter.api.BeforeEach
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
import org.koin.core.component.inject
|
||||||
|
import org.koin.core.context.startKoin
|
||||||
|
import org.koin.test.KoinTest
|
||||||
|
import java.nio.file.Paths
|
||||||
|
|
||||||
|
class MsOcrTest : KoinTest {
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
fun start() {
|
||||||
|
startKoin {
|
||||||
|
printLogger()
|
||||||
|
modules(
|
||||||
|
coreApiModule,
|
||||||
|
httpClientModule)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `can create a request`() = runBlocking {
|
||||||
|
val msOcrClient: MsOcr by inject()
|
||||||
|
val resource = FileResource(Paths.get("../../assets/xs-reg/00001.jpg").toAbsolutePath())
|
||||||
|
val response = msOcrClient.ocr(resource)
|
||||||
|
println(response)
|
||||||
|
Unit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -2,30 +2,16 @@ package de.itkl.textprocessing
|
||||||
|
|
||||||
import kotlinx.coroutines.flow.*
|
import kotlinx.coroutines.flow.*
|
||||||
|
|
||||||
class Histogram(private val histo: MutableMap<String,UInt> = mutableMapOf()) : Iterable<Pair<String, UInt>>{
|
class Histogram(
|
||||||
|
private val histo: MutableMap<String,UInt> = mutableMapOf()
|
||||||
|
) : Iterable<Pair<String, UInt>>{
|
||||||
companion object {
|
companion object {
|
||||||
suspend fun from(flow: Flow<String>): Histogram {
|
|
||||||
return Histogram().apply {
|
|
||||||
flow.collect(this::add)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun fromBagOfWords(bagOfWords: BagOfWords): Histogram {
|
fun fromBagOfWords(bagOfWords: BagOfWords): Histogram {
|
||||||
val result = Histogram()
|
val result = Histogram()
|
||||||
bagOfWords.forEach(result::add)
|
bagOfWords.forEach(result::add)
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
suspend fun fromBagOfWords(flow: Flow<BagOfWords>): Histogram {
|
|
||||||
val result = Histogram()
|
|
||||||
flow.collect() { value ->
|
|
||||||
value.forEach(result::add)
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
fun from(sequence: Sequence<Map<String, String>>): Histogram {
|
fun from(sequence: Sequence<Map<String, String>>): Histogram {
|
||||||
val histo = sequence.associate { map -> map["word"]!! to map["count"]!!.toUInt() }
|
val histo = sequence.associate { map -> map["word"]!! to map["count"]!!.toUInt() }
|
||||||
.toMutableMap()
|
.toMutableMap()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue