Starts the jam

This commit is contained in:
Julien Lengrand-Lambert
2025-05-01 11:32:57 +02:00
parent eab730cced
commit 9cb783d2b7
3 changed files with 37 additions and 16 deletions

View File

@@ -21,7 +21,6 @@ val html = """
""".trimIndent() """.trimIndent()
fun main() { fun main() {
val fetcher = DocumentFetcher() val fetcher = DocumentFetcher()
val docUrl = fetcher.fromUrl("https://www.imdb.com/title/tt0068646/") val docUrl = fetcher.fromUrl("https://www.imdb.com/title/tt0068646/")

View File

@@ -3,9 +3,8 @@ package nl.lengrand.opengraphkt.nl.lengrand.opengraphkt
import org.jsoup.Jsoup import org.jsoup.Jsoup
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
/**
/* * DocumentFetcher's job is to take any type of input and transform it into a JSoup document for the Parser to then do its job
DocumentFetcher's job is to take any type of input and transform it into a JSoup document for the Parser to then do its job
*/ */
class DocumentFetcher { class DocumentFetcher {

View File

@@ -1,13 +1,24 @@
package nl.lengrand.opengraphkt package nl.lengrand.opengraphkt
import org.jsoup.nodes.Document import org.jsoup.nodes.Document
import org.jsoup.select.Elements
data class OpenGraphTag(
val property: String,
val content: String,
)
data class OpenGraph( data class OpenGraph(
val title: String, // Tags can have multiple values for the same property, so we cannot use a Map.
val image: String, val rawTags: Elements,
val description: String? = null, val tags: List<OpenGraphTag>,
val title: String? = null,
val type: String? = null,
val image: String? = null, // Do we just take the first here? There might be several
val url: String? = null, val url: String? = null,
val type: String? = null
// TODO : Continue with more
) )
class Parser { class Parser {
@@ -17,18 +28,30 @@ class Parser {
* Open Graph tags are meta tags with property attributes starting with "og:" * Open Graph tags are meta tags with property attributes starting with "og:"
*/ */
fun extractOpenGraphTags(document: Document): OpenGraph { fun extractOpenGraphTags(document: Document): OpenGraph {
val ogTags = document.select("meta[property^=og:]") val tags = document.select("meta[property^=og:]")
val cleanTags = tags.map {
OpenGraphTag(it.attr("property")
.drop(3), // Is that completely safe?
it.attr("content")
)
}
println(ogTags) println(tags)
println(cleanTags)
// Extract the basic required Open Graph properties // Extract the basic required Open Graph properties
val title = ogTags.select("meta[property=og:title]").attr("content") val title = tags.select("meta[property=og:title]").attr("content")
val image = ogTags.select("meta[property=og:image]").attr("content") val image = tags.select("meta[property=og:image]").attr("content")
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() } val url = tags.select("meta[property=og:url]").attr("content")
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() } val type = tags.select("meta[property=og:type]").attr("content")
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
return OpenGraph(title, image, description, url, type) return OpenGraph(
tags,
cleanTags,
title,
type,
image,
url )
} }
} }