mirror of
https://github.com/jlengrand/OpenGraphKt.git
synced 2026-03-10 00:21:19 +00:00
Cleans up and structures code
This commit is contained in:
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
The MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2009-2025 Jonathan Hedley <https://jsoup.org/>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
91
README.md
91
README.md
@@ -1,85 +1,16 @@
|
|||||||
# OpenGraphKt
|
# OpenGraphKt
|
||||||
|
|
||||||
A simple Kotlin project demonstrating how to extract Open Graph tags from webpages using JSoup.
|
[OpenGraphKt](https://github.com/jlengrand/OpenGraphKt) is a minimalist Kotlin multiplatform library that extracts [Open Graph tags](https://ogp.me/) from HTML pages.
|
||||||
|
The input HTML can be an inlined string, a file, or a remote URL. OpenGraphKt is a tiny wrapper on top of JSoup.
|
||||||
## What is Open Graph?
|
|
||||||
|
|
||||||
Open Graph is a protocol that enables any web page to become a rich object in a social graph. It was originally created by Facebook and is now widely used by many social media platforms and websites.
|
|
||||||
|
|
||||||
Open Graph tags are meta tags with property attributes that start with "og:". They are used to define properties like title, image, description, etc.
|
|
||||||
|
|
||||||
## How to Extract Open Graph Tags with JSoup
|
|
||||||
|
|
||||||
This project demonstrates several ways to extract Open Graph tags from HTML using JSoup:
|
|
||||||
|
|
||||||
### 1. Select all Open Graph tags
|
|
||||||
|
|
||||||
```kotlin
|
|
||||||
val allOgTags = document.select("meta[property^=og:]")
|
|
||||||
allOgTags.forEach { tag ->
|
|
||||||
println("${tag.attr("property")}: ${tag.attr("content")}")
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The CSS selector `meta[property^=og:]` selects all meta tags with a property attribute that starts with "og:".
|
|
||||||
|
|
||||||
### 2. Select a specific Open Graph tag
|
|
||||||
|
|
||||||
```kotlin
|
|
||||||
val ogTitle = document.select("meta[property=og:title]").attr("content")
|
|
||||||
println("og:title: $ogTitle")
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Extract all Open Graph data into a map
|
|
||||||
|
|
||||||
```kotlin
|
|
||||||
val ogData = document.select("meta[property^=og:]")
|
|
||||||
.associate { it.attr("property") to it.attr("content") }
|
|
||||||
|
|
||||||
ogData.forEach { (property, content) ->
|
|
||||||
println("$property: $content")
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. Using a dedicated function
|
|
||||||
|
|
||||||
```kotlin
|
|
||||||
fun extractOpenGraphTags(document: Document): OpenGraph {
|
|
||||||
// Select all meta tags with property attributes starting with "og:"
|
|
||||||
val ogTags = document.select("meta[property^=og:]")
|
|
||||||
|
|
||||||
// Extract the basic required Open Graph properties
|
|
||||||
val title = ogTags.select("meta[property=og:title]").attr("content")
|
|
||||||
val image = ogTags.select("meta[property=og:image]").attr("content")
|
|
||||||
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
|
|
||||||
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
|
|
||||||
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
|
|
||||||
|
|
||||||
return OpenGraph(title, image, description, url, type)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Examples
|
|
||||||
|
|
||||||
The project includes two examples:
|
|
||||||
|
|
||||||
1. `Main.kt`: Connects to a real website (IMDB) and extracts Open Graph tags
|
|
||||||
2. `Example.kt`: Uses a local HTML string with Open Graph tags for demonstration
|
|
||||||
|
|
||||||
## Running the Examples
|
|
||||||
|
|
||||||
To run the Example.kt file:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
./gradlew run
|
|
||||||
```
|
|
||||||
|
|
||||||
To run the Main.kt file, update the `mainClass` in `build.gradle.kts` to "nl.lengrand.MainKt" and run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
./gradlew run
|
|
||||||
```
|
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
- JSoup 1.20.1: A Java library for working with HTML
|
- [JSoup](https://jsoup.org/)
|
||||||
|
|
||||||
|
## Author
|
||||||
|
|
||||||
|
* [Julien Lengrand-Lambert](https://github.com/jlengrand)
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
* [See License](./LICENSE)
|
||||||
@@ -1,58 +0,0 @@
|
|||||||
package nl.lengrand
|
|
||||||
|
|
||||||
import org.jsoup.Jsoup
|
|
||||||
import org.jsoup.nodes.Document
|
|
||||||
|
|
||||||
fun main() {
|
|
||||||
// Example HTML with Open Graph tags
|
|
||||||
val html = """
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Open Graph Example</title>
|
|
||||||
<meta property="og:title" content="The Rock" />
|
|
||||||
<meta property="og:type" content="video.movie" />
|
|
||||||
<meta property="og:url" content="https://example.com/the-rock" />
|
|
||||||
<meta property="og:image" content="https://example.com/rock.jpg" />
|
|
||||||
<meta property="og:description" content="An action movie about a rock" />
|
|
||||||
<meta property="og:site_name" content="Example Movies" />
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<h1>Example Page</h1>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
""".trimIndent()
|
|
||||||
|
|
||||||
// Parse the HTML string into a Document
|
|
||||||
val doc = Jsoup.parse(html)
|
|
||||||
|
|
||||||
// Demonstrate how to select all Open Graph tags
|
|
||||||
println("Example 1: Select all Open Graph tags")
|
|
||||||
val allOgTags = doc.select("meta[property^=og:]")
|
|
||||||
allOgTags.forEach { tag ->
|
|
||||||
println("${tag.attr("property")}: ${tag.attr("content")}")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Demonstrate how to select a specific Open Graph tag
|
|
||||||
println("\nExample 2: Select a specific Open Graph tag")
|
|
||||||
val ogTitle = doc.select("meta[property=og:title]").attr("content")
|
|
||||||
println("og:title: $ogTitle")
|
|
||||||
|
|
||||||
// Demonstrate how to extract all Open Graph data into a map
|
|
||||||
println("\nExample 3: Extract all Open Graph data into a map")
|
|
||||||
val ogData = doc.select("meta[property^=og:]")
|
|
||||||
.associate { it.attr("property") to it.attr("content") }
|
|
||||||
|
|
||||||
ogData.forEach { (property, content) ->
|
|
||||||
println("$property: $content")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Demonstrate using our extractOpenGraphTags function
|
|
||||||
println("\nExample 4: Using our extractOpenGraphTags function")
|
|
||||||
val openGraph = extractOpenGraphTags(doc)
|
|
||||||
println("Title: ${openGraph.title}")
|
|
||||||
println("Image: ${openGraph.image}")
|
|
||||||
println("Description: ${openGraph.description}")
|
|
||||||
println("URL: ${openGraph.url}")
|
|
||||||
println("Type: ${openGraph.type}")
|
|
||||||
}
|
|
||||||
@@ -1,60 +1,35 @@
|
|||||||
package nl.lengrand
|
package nl.lengrand.opengraphkt
|
||||||
|
|
||||||
import org.jsoup.Jsoup
|
import nl.lengrand.opengraphkt.nl.lengrand.opengraphkt.DocumentFetcher
|
||||||
import org.jsoup.nodes.Document
|
|
||||||
|
|
||||||
data class OpenGraph(val title: String, val image: String, val description: String? = null, val url: String? = null, val type: String? = null)
|
val html = """
|
||||||
|
<!DOCTYPE html>
|
||||||
/**
|
<html>
|
||||||
* Extracts Open Graph tags from a JSoup Document
|
<head>
|
||||||
* Open Graph tags are meta tags with property attributes starting with "og:"
|
<title>Open Graph Example</title>
|
||||||
*/
|
<meta property="og:title" content="The Rock" />
|
||||||
fun extractOpenGraphTags(document: Document): OpenGraph {
|
<meta property="og:type" content="video.movie" />
|
||||||
// Select all meta tags with property attributes starting with "og:"
|
<meta property="og:url" content="https://example.com/the-rock" />
|
||||||
val ogTags = document.select("meta[property^=og:]")
|
<meta property="og:image" content="https://example.com/rock.jpg" />
|
||||||
|
<meta property="og:description" content="An action movie about a rock" />
|
||||||
// Extract the basic required Open Graph properties
|
<meta property="og:site_name" content="Example Movies" />
|
||||||
val title = ogTags.select("meta[property=og:title]").attr("content")
|
</head>
|
||||||
val image = ogTags.select("meta[property=og:image]").attr("content")
|
<body>
|
||||||
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
|
<h1>Example Page</h1>
|
||||||
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
|
</body>
|
||||||
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
|
</html>
|
||||||
|
""".trimIndent()
|
||||||
return OpenGraph(title, image, description, url, type)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Prints all Open Graph tags found in a document
|
|
||||||
*/
|
|
||||||
fun printAllOpenGraphTags(document: Document) {
|
|
||||||
val ogTags = document.select("meta[property^=og:]")
|
|
||||||
println("Found ${ogTags.size} Open Graph tags:")
|
|
||||||
|
|
||||||
ogTags.forEach { tag ->
|
|
||||||
val property = tag.attr("property")
|
|
||||||
val content = tag.attr("content")
|
|
||||||
println("$property: $content")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun main() {
|
fun main() {
|
||||||
// Wikipedia doesn't have many Open Graph tags, so let's try a site that likely has them
|
|
||||||
val doc = Jsoup.connect("https://www.imdb.com/title/tt0068646/").get() // The Godfather movie page
|
|
||||||
println("Page title: ${doc.title()}")
|
|
||||||
|
|
||||||
// Print all Open Graph tags
|
val fetcher = DocumentFetcher()
|
||||||
printAllOpenGraphTags(doc)
|
|
||||||
|
|
||||||
// Extract Open Graph data into our data class
|
val docUrl = fetcher.fromUrl("https://www.imdb.com/title/tt0068646/")
|
||||||
try {
|
val docString = fetcher.fromString(html)
|
||||||
val ogData = extractOpenGraphTags(doc)
|
|
||||||
println("\nExtracted Open Graph data:")
|
val ogUrl = Parser().extractOpenGraphTags(docUrl)
|
||||||
println("Title: ${ogData.title}")
|
println(ogUrl)
|
||||||
println("Image: ${ogData.image}")
|
println("-------------")
|
||||||
println("Description: ${ogData.description}")
|
val ogString = Parser().extractOpenGraphTags(docString)
|
||||||
println("URL: ${ogData.url}")
|
println(ogString)
|
||||||
println("Type: ${ogData.type}")
|
|
||||||
} catch (e: Exception) {
|
|
||||||
println("Error extracting Open Graph data: ${e.message}")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
24
src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt
Normal file
24
src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
package nl.lengrand.opengraphkt.nl.lengrand.opengraphkt
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup
|
||||||
|
import org.jsoup.nodes.Document
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
DocumentFetcher's job is to take any type of input and transform it into a JSoup document for the Parser to then do its job
|
||||||
|
*/
|
||||||
|
class DocumentFetcher {
|
||||||
|
|
||||||
|
fun fromUrl(url: String): Document {
|
||||||
|
return Jsoup.connect(url).get()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun fromString(html: String): Document {
|
||||||
|
return Jsoup.parse(html)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun fromFile() : Document {
|
||||||
|
TODO()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
34
src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt
Normal file
34
src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
package nl.lengrand.opengraphkt
|
||||||
|
|
||||||
|
import org.jsoup.nodes.Document
|
||||||
|
|
||||||
|
data class OpenGraph(
|
||||||
|
val title: String,
|
||||||
|
val image: String,
|
||||||
|
val description: String? = null,
|
||||||
|
val url: String? = null,
|
||||||
|
val type: String? = null
|
||||||
|
)
|
||||||
|
|
||||||
|
class Parser {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts Open Graph tags from a JSoup Document
|
||||||
|
* Open Graph tags are meta tags with property attributes starting with "og:"
|
||||||
|
*/
|
||||||
|
fun extractOpenGraphTags(document: Document): OpenGraph {
|
||||||
|
val ogTags = document.select("meta[property^=og:]")
|
||||||
|
|
||||||
|
println(ogTags)
|
||||||
|
|
||||||
|
// Extract the basic required Open Graph properties
|
||||||
|
val title = ogTags.select("meta[property=og:title]").attr("content")
|
||||||
|
val image = ogTags.select("meta[property=og:image]").attr("content")
|
||||||
|
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
|
||||||
|
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
|
||||||
|
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
|
||||||
|
|
||||||
|
return OpenGraph(title, image, description, url, type)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user