mirror of
https://github.com/jlengrand/OpenGraphKt.git
synced 2026-03-10 08:31:23 +00:00
Yolo vibe coding so much generated bullcrap
This commit is contained in:
85
README.md
Normal file
85
README.md
Normal file
@@ -0,0 +1,85 @@
|
||||
# OpenGraphKt
|
||||
|
||||
A simple Kotlin project demonstrating how to extract Open Graph tags from webpages using JSoup.
|
||||
|
||||
## What is Open Graph?
|
||||
|
||||
Open Graph is a protocol that enables any web page to become a rich object in a social graph. It was originally created by Facebook and is now widely used by many social media platforms and websites.
|
||||
|
||||
Open Graph tags are meta tags with property attributes that start with "og:". They are used to define properties like title, image, description, etc.
|
||||
|
||||
## How to Extract Open Graph Tags with JSoup
|
||||
|
||||
This project demonstrates several ways to extract Open Graph tags from HTML using JSoup:
|
||||
|
||||
### 1. Select all Open Graph tags
|
||||
|
||||
```kotlin
|
||||
val allOgTags = document.select("meta[property^=og:]")
|
||||
allOgTags.forEach { tag ->
|
||||
println("${tag.attr("property")}: ${tag.attr("content")}")
|
||||
}
|
||||
```
|
||||
|
||||
The CSS selector `meta[property^=og:]` selects all meta tags with a property attribute that starts with "og:".
|
||||
|
||||
### 2. Select a specific Open Graph tag
|
||||
|
||||
```kotlin
|
||||
val ogTitle = document.select("meta[property=og:title]").attr("content")
|
||||
println("og:title: $ogTitle")
|
||||
```
|
||||
|
||||
### 3. Extract all Open Graph data into a map
|
||||
|
||||
```kotlin
|
||||
val ogData = document.select("meta[property^=og:]")
|
||||
.associate { it.attr("property") to it.attr("content") }
|
||||
|
||||
ogData.forEach { (property, content) ->
|
||||
println("$property: $content")
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Using a dedicated function
|
||||
|
||||
```kotlin
|
||||
fun extractOpenGraphTags(document: Document): OpenGraph {
|
||||
// Select all meta tags with property attributes starting with "og:"
|
||||
val ogTags = document.select("meta[property^=og:]")
|
||||
|
||||
// Extract the basic required Open Graph properties
|
||||
val title = ogTags.select("meta[property=og:title]").attr("content")
|
||||
val image = ogTags.select("meta[property=og:image]").attr("content")
|
||||
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
|
||||
|
||||
return OpenGraph(title, image, description, url, type)
|
||||
}
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
The project includes two examples:
|
||||
|
||||
1. `Main.kt`: Connects to a real website (IMDB) and extracts Open Graph tags
|
||||
2. `Example.kt`: Uses a local HTML string with Open Graph tags for demonstration
|
||||
|
||||
## Running the Examples
|
||||
|
||||
To run the Example.kt file:
|
||||
|
||||
```bash
|
||||
./gradlew run
|
||||
```
|
||||
|
||||
To run the Main.kt file, update the `mainClass` in `build.gradle.kts` to "nl.lengrand.MainKt" and run:
|
||||
|
||||
```bash
|
||||
./gradlew run
|
||||
```
|
||||
|
||||
## Dependencies
|
||||
|
||||
- JSoup 1.20.1: A Java library for working with HTML
|
||||
@@ -31,5 +31,5 @@ kotlin {
|
||||
}
|
||||
|
||||
application {
|
||||
mainClass = "nl.lengrand.MainKt"
|
||||
}
|
||||
mainClass = "nl.lengrand.ExampleKt"
|
||||
}
|
||||
|
||||
58
src/main/kotlin/Example.kt
Normal file
58
src/main/kotlin/Example.kt
Normal file
@@ -0,0 +1,58 @@
|
||||
package nl.lengrand
|
||||
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
|
||||
fun main() {
|
||||
// Example HTML with Open Graph tags
|
||||
val html = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Open Graph Example</title>
|
||||
<meta property="og:title" content="The Rock" />
|
||||
<meta property="og:type" content="video.movie" />
|
||||
<meta property="og:url" content="https://example.com/the-rock" />
|
||||
<meta property="og:image" content="https://example.com/rock.jpg" />
|
||||
<meta property="og:description" content="An action movie about a rock" />
|
||||
<meta property="og:site_name" content="Example Movies" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>Example Page</h1>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent()
|
||||
|
||||
// Parse the HTML string into a Document
|
||||
val doc = Jsoup.parse(html)
|
||||
|
||||
// Demonstrate how to select all Open Graph tags
|
||||
println("Example 1: Select all Open Graph tags")
|
||||
val allOgTags = doc.select("meta[property^=og:]")
|
||||
allOgTags.forEach { tag ->
|
||||
println("${tag.attr("property")}: ${tag.attr("content")}")
|
||||
}
|
||||
|
||||
// Demonstrate how to select a specific Open Graph tag
|
||||
println("\nExample 2: Select a specific Open Graph tag")
|
||||
val ogTitle = doc.select("meta[property=og:title]").attr("content")
|
||||
println("og:title: $ogTitle")
|
||||
|
||||
// Demonstrate how to extract all Open Graph data into a map
|
||||
println("\nExample 3: Extract all Open Graph data into a map")
|
||||
val ogData = doc.select("meta[property^=og:]")
|
||||
.associate { it.attr("property") to it.attr("content") }
|
||||
|
||||
ogData.forEach { (property, content) ->
|
||||
println("$property: $content")
|
||||
}
|
||||
|
||||
// Demonstrate using our extractOpenGraphTags function
|
||||
println("\nExample 4: Using our extractOpenGraphTags function")
|
||||
val openGraph = extractOpenGraphTags(doc)
|
||||
println("Title: ${openGraph.title}")
|
||||
println("Image: ${openGraph.image}")
|
||||
println("Description: ${openGraph.description}")
|
||||
println("URL: ${openGraph.url}")
|
||||
println("Type: ${openGraph.type}")
|
||||
}
|
||||
@@ -1,11 +1,60 @@
|
||||
package nl.lengrand
|
||||
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
|
||||
data class OpenGraph(val title: String, val image: String)
|
||||
data class OpenGraph(val title: String, val image: String, val description: String? = null, val url: String? = null, val type: String? = null)
|
||||
|
||||
/**
|
||||
* Extracts Open Graph tags from a JSoup Document
|
||||
* Open Graph tags are meta tags with property attributes starting with "og:"
|
||||
*/
|
||||
fun extractOpenGraphTags(document: Document): OpenGraph {
|
||||
// Select all meta tags with property attributes starting with "og:"
|
||||
val ogTags = document.select("meta[property^=og:]")
|
||||
|
||||
// Extract the basic required Open Graph properties
|
||||
val title = ogTags.select("meta[property=og:title]").attr("content")
|
||||
val image = ogTags.select("meta[property=og:image]").attr("content")
|
||||
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
|
||||
|
||||
return OpenGraph(title, image, description, url, type)
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints all Open Graph tags found in a document
|
||||
*/
|
||||
fun printAllOpenGraphTags(document: Document) {
|
||||
val ogTags = document.select("meta[property^=og:]")
|
||||
println("Found ${ogTags.size} Open Graph tags:")
|
||||
|
||||
ogTags.forEach { tag ->
|
||||
val property = tag.attr("property")
|
||||
val content = tag.attr("content")
|
||||
println("$property: $content")
|
||||
}
|
||||
}
|
||||
|
||||
fun main() {
|
||||
val doc = Jsoup.connect("https://en.wikipedia.org/").get()
|
||||
println(doc.title());
|
||||
// Wikipedia doesn't have many Open Graph tags, so let's try a site that likely has them
|
||||
val doc = Jsoup.connect("https://www.imdb.com/title/tt0068646/").get() // The Godfather movie page
|
||||
println("Page title: ${doc.title()}")
|
||||
|
||||
}
|
||||
// Print all Open Graph tags
|
||||
printAllOpenGraphTags(doc)
|
||||
|
||||
// Extract Open Graph data into our data class
|
||||
try {
|
||||
val ogData = extractOpenGraphTags(doc)
|
||||
println("\nExtracted Open Graph data:")
|
||||
println("Title: ${ogData.title}")
|
||||
println("Image: ${ogData.image}")
|
||||
println("Description: ${ogData.description}")
|
||||
println("URL: ${ogData.url}")
|
||||
println("Type: ${ogData.type}")
|
||||
} catch (e: Exception) {
|
||||
println("Error extracting Open Graph data: ${e.message}")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user