From 9cb783d2b7b1a5444adf26897204b8cf8267c9b9 Mon Sep 17 00:00:00 2001 From: Julien Lengrand-Lambert Date: Thu, 1 May 2025 11:32:57 +0200 Subject: [PATCH] Starts the jam --- src/main/kotlin/Main.kt | 1 - .../lengrand/opengraphkt/DocumentFetcher.kt | 5 +- .../kotlin/nl/lengrand/opengraphkt/Parser.kt | 47 ++++++++++++++----- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/src/main/kotlin/Main.kt b/src/main/kotlin/Main.kt index 4d2f71b..f9697b1 100644 --- a/src/main/kotlin/Main.kt +++ b/src/main/kotlin/Main.kt @@ -21,7 +21,6 @@ val html = """ """.trimIndent() fun main() { - val fetcher = DocumentFetcher() val docUrl = fetcher.fromUrl("https://www.imdb.com/title/tt0068646/") diff --git a/src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt b/src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt index c4c0650..8fd9b4a 100644 --- a/src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt +++ b/src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt @@ -3,9 +3,8 @@ package nl.lengrand.opengraphkt.nl.lengrand.opengraphkt import org.jsoup.Jsoup import org.jsoup.nodes.Document - -/* -DocumentFetcher's job is to take any type of input and transform it into a JSoup document for the Parser to then do its job +/** + * DocumentFetcher's job is to take any type of input and transform it into a JSoup document for the Parser to then do its job */ class DocumentFetcher { diff --git a/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt b/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt index c4cbce7..13008bd 100644 --- a/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt +++ b/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt @@ -1,13 +1,24 @@ package nl.lengrand.opengraphkt import org.jsoup.nodes.Document +import org.jsoup.select.Elements + +data class OpenGraphTag( + val property: String, + val content: String, +) data class OpenGraph( - val title: String, - val image: String, - val description: String? = null, + // Tags can have multiple values for the same property, so we cannot use a Map. + val rawTags: Elements, + val tags: List, + + val title: String? = null, + val type: String? = null, + val image: String? = null, // Do we just take the first here? There might be several val url: String? = null, - val type: String? = null + + // TODO : Continue with more ) class Parser { @@ -17,18 +28,30 @@ class Parser { * Open Graph tags are meta tags with property attributes starting with "og:" */ fun extractOpenGraphTags(document: Document): OpenGraph { - val ogTags = document.select("meta[property^=og:]") + val tags = document.select("meta[property^=og:]") + val cleanTags = tags.map { + OpenGraphTag(it.attr("property") + .drop(3), // Is that completely safe? + it.attr("content") + ) + } - println(ogTags) + println(tags) + println(cleanTags) // Extract the basic required Open Graph properties - val title = ogTags.select("meta[property=og:title]").attr("content") - val image = ogTags.select("meta[property=og:image]").attr("content") - val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() } - val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() } - val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() } + val title = tags.select("meta[property=og:title]").attr("content") + val image = tags.select("meta[property=og:image]").attr("content") + val url = tags.select("meta[property=og:url]").attr("content") + val type = tags.select("meta[property=og:type]").attr("content") - return OpenGraph(title, image, description, url, type) + return OpenGraph( + tags, + cleanTags, + title, + type, + image, + url ) } } \ No newline at end of file