From 5372fab21c3293ecf9a2419dbbf71bd945f1c858 Mon Sep 17 00:00:00 2001 From: julien Lengrand-Lambert Date: Tue, 3 Jun 2025 00:36:03 +0200 Subject: [PATCH] Fix types (#22) * Improves types * Adds missing properties to music album * Changes gender from String to Enum * Changes URL to an actual URL * Fix typo * Adds scalable live testing on real data * Uses OffsetDateTime for articles, videos and books --- .idea/gradle.xml | 1 + .../kotlin/fr/lengrand/opengraphkt/Models.kt | 66 +- .../kotlin/fr/lengrand/opengraphkt/Parser.kt | 73 +- .../fr/lengrand/opengraphkt/ParserTest.kt | 319 +++- scrape-test/.gitignore | 1 + scrape-test/README.md | 20 + scrape-test/build.gradle.kts | 28 + scrape-test/data/top500.csv | 501 ++++++ scrape-test/data/website_classification.csv | 1409 +++++++++++++++++ .../kotlin/fr/lengrand/scrape/ParserTest.kt | 42 + .../main/kotlin/fr/lengrand/scrape/Scraper.kt | 146 ++ settings.gradle.kts | 3 +- 12 files changed, 2553 insertions(+), 56 deletions(-) create mode 100644 scrape-test/.gitignore create mode 100644 scrape-test/README.md create mode 100644 scrape-test/build.gradle.kts create mode 100644 scrape-test/data/top500.csv create mode 100644 scrape-test/data/website_classification.csv create mode 100644 scrape-test/src/main/kotlin/fr/lengrand/scrape/ParserTest.kt create mode 100644 scrape-test/src/main/kotlin/fr/lengrand/scrape/Scraper.kt diff --git a/.idea/gradle.xml b/.idea/gradle.xml index a537b7e..1eb4686 100644 --- a/.idea/gradle.xml +++ b/.idea/gradle.xml @@ -12,6 +12,7 @@ diff --git a/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/Models.kt b/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/Models.kt index 7bf2880..d8ff45e 100644 --- a/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/Models.kt +++ b/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/Models.kt @@ -1,5 +1,8 @@ package fr.lengrand.opengraphkt +import java.net.URL +import java.time.OffsetDateTime + /** * Enum representing the different types of Open Graph objects. */ @@ -46,6 +49,21 @@ enum class Type { } } +enum class Gender { + MALE, + FEMALE; + + companion object { + fun fromString(gender: String): Gender { + return valueOf(gender.uppercase()) + } + } + + override fun toString(): String { + return this.name.lowercase() + } +} + data class Tag( val property: String, val content: String, @@ -60,15 +78,15 @@ data class Data( // Basic metadata val title: String?, val type: String?, - val url: String?, + val url: URL?, val description: String?, + // Other metadata val siteName: String?, val determiner: String?, - val locale: String?, + val locale: String?, val localeAlternate: List, - // Structured properties val images: List, val videos: List