From c8a247be8f78eb95f6669c53dd9bcd75a63c9765 Mon Sep 17 00:00:00 2001 From: Julien Lengrand-Lambert Date: Thu, 1 May 2025 10:47:55 +0200 Subject: [PATCH] Yolo vibe coding so much generated bullcrap --- README.md | 85 ++++++++++++++++++++++++++++++++++++++ build.gradle.kts | 4 +- src/main/kotlin/Example.kt | 58 ++++++++++++++++++++++++++ src/main/kotlin/Main.kt | 57 +++++++++++++++++++++++-- 4 files changed, 198 insertions(+), 6 deletions(-) create mode 100644 README.md create mode 100644 src/main/kotlin/Example.kt diff --git a/README.md b/README.md new file mode 100644 index 0000000..7a9106d --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ +# OpenGraphKt + +A simple Kotlin project demonstrating how to extract Open Graph tags from webpages using JSoup. + +## What is Open Graph? + +Open Graph is a protocol that enables any web page to become a rich object in a social graph. It was originally created by Facebook and is now widely used by many social media platforms and websites. + +Open Graph tags are meta tags with property attributes that start with "og:". They are used to define properties like title, image, description, etc. + +## How to Extract Open Graph Tags with JSoup + +This project demonstrates several ways to extract Open Graph tags from HTML using JSoup: + +### 1. Select all Open Graph tags + +```kotlin +val allOgTags = document.select("meta[property^=og:]") +allOgTags.forEach { tag -> + println("${tag.attr("property")}: ${tag.attr("content")}") +} +``` + +The CSS selector `meta[property^=og:]` selects all meta tags with a property attribute that starts with "og:". + +### 2. Select a specific Open Graph tag + +```kotlin +val ogTitle = document.select("meta[property=og:title]").attr("content") +println("og:title: $ogTitle") +``` + +### 3. Extract all Open Graph data into a map + +```kotlin +val ogData = document.select("meta[property^=og:]") + .associate { it.attr("property") to it.attr("content") } + +ogData.forEach { (property, content) -> + println("$property: $content") +} +``` + +### 4. Using a dedicated function + +```kotlin +fun extractOpenGraphTags(document: Document): OpenGraph { + // Select all meta tags with property attributes starting with "og:" + val ogTags = document.select("meta[property^=og:]") + + // Extract the basic required Open Graph properties + val title = ogTags.select("meta[property=og:title]").attr("content") + val image = ogTags.select("meta[property=og:image]").attr("content") + val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() } + val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() } + val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() } + + return OpenGraph(title, image, description, url, type) +} +``` + +## Examples + +The project includes two examples: + +1. `Main.kt`: Connects to a real website (IMDB) and extracts Open Graph tags +2. `Example.kt`: Uses a local HTML string with Open Graph tags for demonstration + +## Running the Examples + +To run the Example.kt file: + +```bash +./gradlew run +``` + +To run the Main.kt file, update the `mainClass` in `build.gradle.kts` to "nl.lengrand.MainKt" and run: + +```bash +./gradlew run +``` + +## Dependencies + +- JSoup 1.20.1: A Java library for working with HTML \ No newline at end of file diff --git a/build.gradle.kts b/build.gradle.kts index f9b3237..f68ee5b 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -31,5 +31,5 @@ kotlin { } application { - mainClass = "nl.lengrand.MainKt" -} \ No newline at end of file + mainClass = "nl.lengrand.ExampleKt" +} diff --git a/src/main/kotlin/Example.kt b/src/main/kotlin/Example.kt new file mode 100644 index 0000000..7949783 --- /dev/null +++ b/src/main/kotlin/Example.kt @@ -0,0 +1,58 @@ +package nl.lengrand + +import org.jsoup.Jsoup +import org.jsoup.nodes.Document + +fun main() { + // Example HTML with Open Graph tags + val html = """ + + + + Open Graph Example + + + + + + + + +

Example Page

+ + + """.trimIndent() + + // Parse the HTML string into a Document + val doc = Jsoup.parse(html) + + // Demonstrate how to select all Open Graph tags + println("Example 1: Select all Open Graph tags") + val allOgTags = doc.select("meta[property^=og:]") + allOgTags.forEach { tag -> + println("${tag.attr("property")}: ${tag.attr("content")}") + } + + // Demonstrate how to select a specific Open Graph tag + println("\nExample 2: Select a specific Open Graph tag") + val ogTitle = doc.select("meta[property=og:title]").attr("content") + println("og:title: $ogTitle") + + // Demonstrate how to extract all Open Graph data into a map + println("\nExample 3: Extract all Open Graph data into a map") + val ogData = doc.select("meta[property^=og:]") + .associate { it.attr("property") to it.attr("content") } + + ogData.forEach { (property, content) -> + println("$property: $content") + } + + // Demonstrate using our extractOpenGraphTags function + println("\nExample 4: Using our extractOpenGraphTags function") + val openGraph = extractOpenGraphTags(doc) + println("Title: ${openGraph.title}") + println("Image: ${openGraph.image}") + println("Description: ${openGraph.description}") + println("URL: ${openGraph.url}") + println("Type: ${openGraph.type}") +} \ No newline at end of file diff --git a/src/main/kotlin/Main.kt b/src/main/kotlin/Main.kt index e984473..1cdf20a 100644 --- a/src/main/kotlin/Main.kt +++ b/src/main/kotlin/Main.kt @@ -1,11 +1,60 @@ package nl.lengrand import org.jsoup.Jsoup +import org.jsoup.nodes.Document -data class OpenGraph(val title: String, val image: String) +data class OpenGraph(val title: String, val image: String, val description: String? = null, val url: String? = null, val type: String? = null) + +/** + * Extracts Open Graph tags from a JSoup Document + * Open Graph tags are meta tags with property attributes starting with "og:" + */ +fun extractOpenGraphTags(document: Document): OpenGraph { + // Select all meta tags with property attributes starting with "og:" + val ogTags = document.select("meta[property^=og:]") + + // Extract the basic required Open Graph properties + val title = ogTags.select("meta[property=og:title]").attr("content") + val image = ogTags.select("meta[property=og:image]").attr("content") + val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() } + val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() } + val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() } + + return OpenGraph(title, image, description, url, type) +} + +/** + * Prints all Open Graph tags found in a document + */ +fun printAllOpenGraphTags(document: Document) { + val ogTags = document.select("meta[property^=og:]") + println("Found ${ogTags.size} Open Graph tags:") + + ogTags.forEach { tag -> + val property = tag.attr("property") + val content = tag.attr("content") + println("$property: $content") + } +} fun main() { - val doc = Jsoup.connect("https://en.wikipedia.org/").get() - println(doc.title()); + // Wikipedia doesn't have many Open Graph tags, so let's try a site that likely has them + val doc = Jsoup.connect("https://www.imdb.com/title/tt0068646/").get() // The Godfather movie page + println("Page title: ${doc.title()}") -} \ No newline at end of file + // Print all Open Graph tags + printAllOpenGraphTags(doc) + + // Extract Open Graph data into our data class + try { + val ogData = extractOpenGraphTags(doc) + println("\nExtracted Open Graph data:") + println("Title: ${ogData.title}") + println("Image: ${ogData.image}") + println("Description: ${ogData.description}") + println("URL: ${ogData.url}") + println("Type: ${ogData.type}") + } catch (e: Exception) { + println("Error extracting Open Graph data: ${e.message}") + } +}