Cleans up and structures code

This commit is contained in:
Julien Lengrand-Lambert
2025-05-01 11:19:16 +02:00
parent c8a247be8f
commit eab730cced
6 changed files with 118 additions and 191 deletions

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
The MIT License
Copyright (c) 2009-2025 Jonathan Hedley <https://jsoup.org/>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,85 +1,16 @@
# OpenGraphKt # OpenGraphKt
A simple Kotlin project demonstrating how to extract Open Graph tags from webpages using JSoup. [OpenGraphKt](https://github.com/jlengrand/OpenGraphKt) is a minimalist Kotlin multiplatform library that extracts [Open Graph tags](https://ogp.me/) from HTML pages.
The input HTML can be an inlined string, a file, or a remote URL. OpenGraphKt is a tiny wrapper on top of JSoup.
## What is Open Graph?
Open Graph is a protocol that enables any web page to become a rich object in a social graph. It was originally created by Facebook and is now widely used by many social media platforms and websites.
Open Graph tags are meta tags with property attributes that start with "og:". They are used to define properties like title, image, description, etc.
## How to Extract Open Graph Tags with JSoup
This project demonstrates several ways to extract Open Graph tags from HTML using JSoup:
### 1. Select all Open Graph tags
```kotlin
val allOgTags = document.select("meta[property^=og:]")
allOgTags.forEach { tag ->
println("${tag.attr("property")}: ${tag.attr("content")}")
}
```
The CSS selector `meta[property^=og:]` selects all meta tags with a property attribute that starts with "og:".
### 2. Select a specific Open Graph tag
```kotlin
val ogTitle = document.select("meta[property=og:title]").attr("content")
println("og:title: $ogTitle")
```
### 3. Extract all Open Graph data into a map
```kotlin
val ogData = document.select("meta[property^=og:]")
.associate { it.attr("property") to it.attr("content") }
ogData.forEach { (property, content) ->
println("$property: $content")
}
```
### 4. Using a dedicated function
```kotlin
fun extractOpenGraphTags(document: Document): OpenGraph {
// Select all meta tags with property attributes starting with "og:"
val ogTags = document.select("meta[property^=og:]")
// Extract the basic required Open Graph properties
val title = ogTags.select("meta[property=og:title]").attr("content")
val image = ogTags.select("meta[property=og:image]").attr("content")
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
return OpenGraph(title, image, description, url, type)
}
```
## Examples
The project includes two examples:
1. `Main.kt`: Connects to a real website (IMDB) and extracts Open Graph tags
2. `Example.kt`: Uses a local HTML string with Open Graph tags for demonstration
## Running the Examples
To run the Example.kt file:
```bash
./gradlew run
```
To run the Main.kt file, update the `mainClass` in `build.gradle.kts` to "nl.lengrand.MainKt" and run:
```bash
./gradlew run
```
## Dependencies ## Dependencies
- JSoup 1.20.1: A Java library for working with HTML - [JSoup](https://jsoup.org/)
## Author
* [Julien Lengrand-Lambert](https://github.com/jlengrand)
## License
* [See License](./LICENSE)

View File

@@ -1,58 +0,0 @@
package nl.lengrand
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
fun main() {
// Example HTML with Open Graph tags
val html = """
<!DOCTYPE html>
<html>
<head>
<title>Open Graph Example</title>
<meta property="og:title" content="The Rock" />
<meta property="og:type" content="video.movie" />
<meta property="og:url" content="https://example.com/the-rock" />
<meta property="og:image" content="https://example.com/rock.jpg" />
<meta property="og:description" content="An action movie about a rock" />
<meta property="og:site_name" content="Example Movies" />
</head>
<body>
<h1>Example Page</h1>
</body>
</html>
""".trimIndent()
// Parse the HTML string into a Document
val doc = Jsoup.parse(html)
// Demonstrate how to select all Open Graph tags
println("Example 1: Select all Open Graph tags")
val allOgTags = doc.select("meta[property^=og:]")
allOgTags.forEach { tag ->
println("${tag.attr("property")}: ${tag.attr("content")}")
}
// Demonstrate how to select a specific Open Graph tag
println("\nExample 2: Select a specific Open Graph tag")
val ogTitle = doc.select("meta[property=og:title]").attr("content")
println("og:title: $ogTitle")
// Demonstrate how to extract all Open Graph data into a map
println("\nExample 3: Extract all Open Graph data into a map")
val ogData = doc.select("meta[property^=og:]")
.associate { it.attr("property") to it.attr("content") }
ogData.forEach { (property, content) ->
println("$property: $content")
}
// Demonstrate using our extractOpenGraphTags function
println("\nExample 4: Using our extractOpenGraphTags function")
val openGraph = extractOpenGraphTags(doc)
println("Title: ${openGraph.title}")
println("Image: ${openGraph.image}")
println("Description: ${openGraph.description}")
println("URL: ${openGraph.url}")
println("Type: ${openGraph.type}")
}

View File

@@ -1,60 +1,35 @@
package nl.lengrand package nl.lengrand.opengraphkt
import org.jsoup.Jsoup import nl.lengrand.opengraphkt.nl.lengrand.opengraphkt.DocumentFetcher
import org.jsoup.nodes.Document
data class OpenGraph(val title: String, val image: String, val description: String? = null, val url: String? = null, val type: String? = null) val html = """
<!DOCTYPE html>
/** <html>
* Extracts Open Graph tags from a JSoup Document <head>
* Open Graph tags are meta tags with property attributes starting with "og:" <title>Open Graph Example</title>
*/ <meta property="og:title" content="The Rock" />
fun extractOpenGraphTags(document: Document): OpenGraph { <meta property="og:type" content="video.movie" />
// Select all meta tags with property attributes starting with "og:" <meta property="og:url" content="https://example.com/the-rock" />
val ogTags = document.select("meta[property^=og:]") <meta property="og:image" content="https://example.com/rock.jpg" />
<meta property="og:description" content="An action movie about a rock" />
// Extract the basic required Open Graph properties <meta property="og:site_name" content="Example Movies" />
val title = ogTags.select("meta[property=og:title]").attr("content") </head>
val image = ogTags.select("meta[property=og:image]").attr("content") <body>
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() } <h1>Example Page</h1>
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() } </body>
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() } </html>
""".trimIndent()
return OpenGraph(title, image, description, url, type)
}
/**
* Prints all Open Graph tags found in a document
*/
fun printAllOpenGraphTags(document: Document) {
val ogTags = document.select("meta[property^=og:]")
println("Found ${ogTags.size} Open Graph tags:")
ogTags.forEach { tag ->
val property = tag.attr("property")
val content = tag.attr("content")
println("$property: $content")
}
}
fun main() { fun main() {
// Wikipedia doesn't have many Open Graph tags, so let's try a site that likely has them
val doc = Jsoup.connect("https://www.imdb.com/title/tt0068646/").get() // The Godfather movie page
println("Page title: ${doc.title()}")
// Print all Open Graph tags val fetcher = DocumentFetcher()
printAllOpenGraphTags(doc)
// Extract Open Graph data into our data class val docUrl = fetcher.fromUrl("https://www.imdb.com/title/tt0068646/")
try { val docString = fetcher.fromString(html)
val ogData = extractOpenGraphTags(doc)
println("\nExtracted Open Graph data:") val ogUrl = Parser().extractOpenGraphTags(docUrl)
println("Title: ${ogData.title}") println(ogUrl)
println("Image: ${ogData.image}") println("-------------")
println("Description: ${ogData.description}") val ogString = Parser().extractOpenGraphTags(docString)
println("URL: ${ogData.url}") println(ogString)
println("Type: ${ogData.type}")
} catch (e: Exception) {
println("Error extracting Open Graph data: ${e.message}")
}
} }

View File

@@ -0,0 +1,24 @@
package nl.lengrand.opengraphkt.nl.lengrand.opengraphkt
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
/*
DocumentFetcher's job is to take any type of input and transform it into a JSoup document for the Parser to then do its job
*/
class DocumentFetcher {
fun fromUrl(url: String): Document {
return Jsoup.connect(url).get()
}
fun fromString(html: String): Document {
return Jsoup.parse(html)
}
fun fromFile() : Document {
TODO()
}
}

View File

@@ -0,0 +1,34 @@
package nl.lengrand.opengraphkt
import org.jsoup.nodes.Document
data class OpenGraph(
val title: String,
val image: String,
val description: String? = null,
val url: String? = null,
val type: String? = null
)
class Parser {
/**
* Extracts Open Graph tags from a JSoup Document
* Open Graph tags are meta tags with property attributes starting with "og:"
*/
fun extractOpenGraphTags(document: Document): OpenGraph {
val ogTags = document.select("meta[property^=og:]")
println(ogTags)
// Extract the basic required Open Graph properties
val title = ogTags.select("meta[property=og:title]").attr("content")
val image = ogTags.select("meta[property=og:image]").attr("content")
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
return OpenGraph(title, image, description, url, type)
}
}