starting with ms ocr client
parent
30dc3b658d
commit
9f3813a83a
|
|
@ -7,5 +7,5 @@ repositories {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:1.8.20")
|
||||
implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:$embeddedKotlinVersion")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,11 @@ dependencies {
|
|||
val koin_version = "3.5.3"
|
||||
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.7.3")
|
||||
implementation("io.insert-koin:koin-core:$koin_version")
|
||||
implementation("org.jetbrains.kotlinx:kotlinx-datetime:0.5.0")
|
||||
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.2")
|
||||
|
||||
|
||||
testImplementation("io.insert-koin:koin-test:$koin_version")
|
||||
}
|
||||
|
||||
java {
|
||||
|
|
|
|||
|
|
@ -1,6 +0,0 @@
|
|||
package de.itkl.clients
|
||||
|
||||
class MsOcr {
|
||||
|
||||
suspend fun ocr() {}
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
package de.itkl.core_api.interfaces.data
|
||||
interface DataTable : Iterable<List<String>> {
|
||||
val columns: List<String>
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
plugins {
|
||||
kotlin("plugin.serialization") version embeddedKotlinVersion
|
||||
}
|
||||
|
||||
val ktorVersion: String by project
|
||||
|
||||
dependencies {
|
||||
api(project(":libraries:core-api"))
|
||||
|
||||
api("io.ktor:ktor-client-core:$ktorVersion")
|
||||
api("io.ktor:ktor-client-core-jvm:$ktorVersion")
|
||||
implementation("io.ktor:ktor-client-cio:$ktorVersion")
|
||||
implementation("io.ktor:ktor-client-content-negotiation:$ktorVersion")
|
||||
implementation("io.ktor:ktor-serialization-kotlinx-json:$ktorVersion")
|
||||
}
|
||||
|
|
@ -0,0 +1 @@
|
|||
ktorVersion=2.3.7
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
package de.itkl.httpClient.clients
|
||||
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import io.ktor.client.*
|
||||
import io.ktor.client.call.*
|
||||
import io.ktor.client.request.*
|
||||
import io.ktor.client.statement.*
|
||||
import io.ktor.http.*
|
||||
import org.koin.core.component.KoinComponent
|
||||
import org.koin.core.component.inject
|
||||
|
||||
private val Log = KotlinLogging.logger { }
|
||||
class MsOcr: KoinComponent {
|
||||
private val httpClient: HttpClient by inject()
|
||||
|
||||
suspend fun ocr(resource: Resource): MsOcrResponse {
|
||||
val response = httpClient.post {
|
||||
url("http://10.54.150.152:5000/vision/v3.2/read/syncAnalyze")
|
||||
parameters {
|
||||
append("language", "de")
|
||||
append("readingOrder", "natural")
|
||||
}
|
||||
contentType(resource.contentType)
|
||||
setBody(resource.read())
|
||||
}
|
||||
println("got response: ${response.status} in ${response.responseTime}")
|
||||
return response.body()
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
package de.itkl.httpClient.clients
|
||||
|
||||
|
||||
import kotlinx.datetime.Instant
|
||||
import kotlinx.datetime.LocalDateTime
|
||||
import kotlinx.serialization.SerialName
|
||||
import kotlinx.serialization.Serializable
|
||||
|
||||
@Serializable
|
||||
data class MsOcrResponse(
|
||||
@SerialName("analyzeResult")
|
||||
val analyzeResult: AnalyzeResult,
|
||||
@SerialName("createdDateTime")
|
||||
val createdDateTime: Instant, // 2023-12-29T21:02:30Z
|
||||
@SerialName("lastUpdatedDateTime")
|
||||
val lastUpdatedDateTime: Instant, // 2023-12-29T21:02:31Z
|
||||
@SerialName("status")
|
||||
val status: String // succeeded
|
||||
) {
|
||||
@Serializable
|
||||
data class AnalyzeResult(
|
||||
@SerialName("modelVersion")
|
||||
val modelVersion: String, // 2022-04-30
|
||||
@SerialName("readResults")
|
||||
val readResults: List<ReadResult>,
|
||||
@SerialName("version")
|
||||
val version: String // 3.2.0
|
||||
) {
|
||||
@Serializable
|
||||
data class ReadResult(
|
||||
@SerialName("angle")
|
||||
val angle: Int, // 0
|
||||
@SerialName("height")
|
||||
val height: Int, // 3507
|
||||
@SerialName("lines")
|
||||
val lines: List<Line>,
|
||||
@SerialName("page")
|
||||
val page: Int, // 1
|
||||
@SerialName("unit")
|
||||
val unit: String, // pixel
|
||||
@SerialName("width")
|
||||
val width: Int // 2481
|
||||
) {
|
||||
@Serializable
|
||||
data class Line(
|
||||
@SerialName("appearance")
|
||||
val appearance: Appearance,
|
||||
@SerialName("boundingBox")
|
||||
val boundingBox: List<Int>,
|
||||
@SerialName("text")
|
||||
val text: String, // Franz Mustermann
|
||||
@SerialName("words")
|
||||
val words: List<Word>
|
||||
) {
|
||||
@Serializable
|
||||
data class Appearance(
|
||||
@SerialName("style")
|
||||
val style: Style
|
||||
) {
|
||||
@Serializable
|
||||
data class Style(
|
||||
@SerialName("confidence")
|
||||
val confidence: Double, // 0.972
|
||||
@SerialName("name")
|
||||
val name: String // other
|
||||
)
|
||||
}
|
||||
|
||||
@Serializable
|
||||
data class Word(
|
||||
@SerialName("boundingBox")
|
||||
val boundingBox: List<Int>,
|
||||
@SerialName("confidence")
|
||||
val confidence: Double, // 0.998
|
||||
@SerialName("text")
|
||||
val text: String // Franz
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
package de.itkl.httpClient
|
||||
|
||||
import io.ktor.client.*
|
||||
import io.ktor.client.engine.cio.*
|
||||
import io.ktor.client.plugins.contentnegotiation.*
|
||||
import io.ktor.serialization.kotlinx.json.*
|
||||
|
||||
fun createHttpClient(): HttpClient {
|
||||
return HttpClient(CIO) {
|
||||
install(ContentNegotiation) {
|
||||
json()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
package de.itkl.httpClient
|
||||
|
||||
import de.itkl.httpClient.clients.MsOcr
|
||||
import io.ktor.client.*
|
||||
import org.koin.dsl.module
|
||||
|
||||
val httpClientModule = module {
|
||||
single<HttpClient> { createHttpClient() }
|
||||
single<MsOcr> { MsOcr() }
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
package de.itkl.httpClient.clients
|
||||
|
||||
import de.itkl.core_api.coreApiModule
|
||||
import de.itkl.core_api.implementation.FileResource
|
||||
import de.itkl.core_api.interfaces.Resource
|
||||
import de.itkl.httpClient.httpClientModule
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.junit.Rule
|
||||
import org.junit.jupiter.api.BeforeEach
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.koin.core.component.inject
|
||||
import org.koin.core.context.startKoin
|
||||
import org.koin.test.KoinTest
|
||||
import java.nio.file.Paths
|
||||
|
||||
class MsOcrTest : KoinTest {
|
||||
|
||||
@BeforeEach
|
||||
fun start() {
|
||||
startKoin {
|
||||
printLogger()
|
||||
modules(
|
||||
coreApiModule,
|
||||
httpClientModule)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `can create a request`() = runBlocking {
|
||||
val msOcrClient: MsOcr by inject()
|
||||
val resource = FileResource(Paths.get("../../assets/xs-reg/00001.jpg").toAbsolutePath())
|
||||
val response = msOcrClient.ocr(resource)
|
||||
println(response)
|
||||
Unit
|
||||
}
|
||||
}
|
||||
|
|
@ -2,30 +2,16 @@ package de.itkl.textprocessing
|
|||
|
||||
import kotlinx.coroutines.flow.*
|
||||
|
||||
class Histogram(private val histo: MutableMap<String,UInt> = mutableMapOf()) : Iterable<Pair<String, UInt>>{
|
||||
|
||||
class Histogram(
|
||||
private val histo: MutableMap<String,UInt> = mutableMapOf()
|
||||
) : Iterable<Pair<String, UInt>>{
|
||||
companion object {
|
||||
suspend fun from(flow: Flow<String>): Histogram {
|
||||
return Histogram().apply {
|
||||
flow.collect(this::add)
|
||||
}
|
||||
}
|
||||
|
||||
fun fromBagOfWords(bagOfWords: BagOfWords): Histogram {
|
||||
val result = Histogram()
|
||||
bagOfWords.forEach(result::add)
|
||||
return result
|
||||
}
|
||||
|
||||
|
||||
suspend fun fromBagOfWords(flow: Flow<BagOfWords>): Histogram {
|
||||
val result = Histogram()
|
||||
flow.collect() { value ->
|
||||
value.forEach(result::add)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
fun from(sequence: Sequence<Map<String, String>>): Histogram {
|
||||
val histo = sequence.associate { map -> map["word"]!! to map["count"]!!.toUInt() }
|
||||
.toMutableMap()
|
||||
|
|
|
|||
Loading…
Reference in New Issue