mirror of
https://github.com/jlengrand/OpenGraphKt.git
synced 2026-03-10 00:21:19 +00:00
Cleans up and structures code
This commit is contained in:
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
The MIT License
|
||||
|
||||
Copyright (c) 2009-2025 Jonathan Hedley <https://jsoup.org/>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
91
README.md
91
README.md
@@ -1,85 +1,16 @@
|
||||
# OpenGraphKt
|
||||
|
||||
A simple Kotlin project demonstrating how to extract Open Graph tags from webpages using JSoup.
|
||||
|
||||
## What is Open Graph?
|
||||
|
||||
Open Graph is a protocol that enables any web page to become a rich object in a social graph. It was originally created by Facebook and is now widely used by many social media platforms and websites.
|
||||
|
||||
Open Graph tags are meta tags with property attributes that start with "og:". They are used to define properties like title, image, description, etc.
|
||||
|
||||
## How to Extract Open Graph Tags with JSoup
|
||||
|
||||
This project demonstrates several ways to extract Open Graph tags from HTML using JSoup:
|
||||
|
||||
### 1. Select all Open Graph tags
|
||||
|
||||
```kotlin
|
||||
val allOgTags = document.select("meta[property^=og:]")
|
||||
allOgTags.forEach { tag ->
|
||||
println("${tag.attr("property")}: ${tag.attr("content")}")
|
||||
}
|
||||
```
|
||||
|
||||
The CSS selector `meta[property^=og:]` selects all meta tags with a property attribute that starts with "og:".
|
||||
|
||||
### 2. Select a specific Open Graph tag
|
||||
|
||||
```kotlin
|
||||
val ogTitle = document.select("meta[property=og:title]").attr("content")
|
||||
println("og:title: $ogTitle")
|
||||
```
|
||||
|
||||
### 3. Extract all Open Graph data into a map
|
||||
|
||||
```kotlin
|
||||
val ogData = document.select("meta[property^=og:]")
|
||||
.associate { it.attr("property") to it.attr("content") }
|
||||
|
||||
ogData.forEach { (property, content) ->
|
||||
println("$property: $content")
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Using a dedicated function
|
||||
|
||||
```kotlin
|
||||
fun extractOpenGraphTags(document: Document): OpenGraph {
|
||||
// Select all meta tags with property attributes starting with "og:"
|
||||
val ogTags = document.select("meta[property^=og:]")
|
||||
|
||||
// Extract the basic required Open Graph properties
|
||||
val title = ogTags.select("meta[property=og:title]").attr("content")
|
||||
val image = ogTags.select("meta[property=og:image]").attr("content")
|
||||
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
|
||||
|
||||
return OpenGraph(title, image, description, url, type)
|
||||
}
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
The project includes two examples:
|
||||
|
||||
1. `Main.kt`: Connects to a real website (IMDB) and extracts Open Graph tags
|
||||
2. `Example.kt`: Uses a local HTML string with Open Graph tags for demonstration
|
||||
|
||||
## Running the Examples
|
||||
|
||||
To run the Example.kt file:
|
||||
|
||||
```bash
|
||||
./gradlew run
|
||||
```
|
||||
|
||||
To run the Main.kt file, update the `mainClass` in `build.gradle.kts` to "nl.lengrand.MainKt" and run:
|
||||
|
||||
```bash
|
||||
./gradlew run
|
||||
```
|
||||
[OpenGraphKt](https://github.com/jlengrand/OpenGraphKt) is a minimalist Kotlin multiplatform library that extracts [Open Graph tags](https://ogp.me/) from HTML pages.
|
||||
The input HTML can be an inlined string, a file, or a remote URL. OpenGraphKt is a tiny wrapper on top of JSoup.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- JSoup 1.20.1: A Java library for working with HTML
|
||||
- [JSoup](https://jsoup.org/)
|
||||
|
||||
## Author
|
||||
|
||||
* [Julien Lengrand-Lambert](https://github.com/jlengrand)
|
||||
|
||||
## License
|
||||
|
||||
* [See License](./LICENSE)
|
||||
@@ -1,58 +0,0 @@
|
||||
package nl.lengrand
|
||||
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
|
||||
fun main() {
|
||||
// Example HTML with Open Graph tags
|
||||
val html = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Open Graph Example</title>
|
||||
<meta property="og:title" content="The Rock" />
|
||||
<meta property="og:type" content="video.movie" />
|
||||
<meta property="og:url" content="https://example.com/the-rock" />
|
||||
<meta property="og:image" content="https://example.com/rock.jpg" />
|
||||
<meta property="og:description" content="An action movie about a rock" />
|
||||
<meta property="og:site_name" content="Example Movies" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>Example Page</h1>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent()
|
||||
|
||||
// Parse the HTML string into a Document
|
||||
val doc = Jsoup.parse(html)
|
||||
|
||||
// Demonstrate how to select all Open Graph tags
|
||||
println("Example 1: Select all Open Graph tags")
|
||||
val allOgTags = doc.select("meta[property^=og:]")
|
||||
allOgTags.forEach { tag ->
|
||||
println("${tag.attr("property")}: ${tag.attr("content")}")
|
||||
}
|
||||
|
||||
// Demonstrate how to select a specific Open Graph tag
|
||||
println("\nExample 2: Select a specific Open Graph tag")
|
||||
val ogTitle = doc.select("meta[property=og:title]").attr("content")
|
||||
println("og:title: $ogTitle")
|
||||
|
||||
// Demonstrate how to extract all Open Graph data into a map
|
||||
println("\nExample 3: Extract all Open Graph data into a map")
|
||||
val ogData = doc.select("meta[property^=og:]")
|
||||
.associate { it.attr("property") to it.attr("content") }
|
||||
|
||||
ogData.forEach { (property, content) ->
|
||||
println("$property: $content")
|
||||
}
|
||||
|
||||
// Demonstrate using our extractOpenGraphTags function
|
||||
println("\nExample 4: Using our extractOpenGraphTags function")
|
||||
val openGraph = extractOpenGraphTags(doc)
|
||||
println("Title: ${openGraph.title}")
|
||||
println("Image: ${openGraph.image}")
|
||||
println("Description: ${openGraph.description}")
|
||||
println("URL: ${openGraph.url}")
|
||||
println("Type: ${openGraph.type}")
|
||||
}
|
||||
@@ -1,60 +1,35 @@
|
||||
package nl.lengrand
|
||||
package nl.lengrand.opengraphkt
|
||||
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
import nl.lengrand.opengraphkt.nl.lengrand.opengraphkt.DocumentFetcher
|
||||
|
||||
data class OpenGraph(val title: String, val image: String, val description: String? = null, val url: String? = null, val type: String? = null)
|
||||
|
||||
/**
|
||||
* Extracts Open Graph tags from a JSoup Document
|
||||
* Open Graph tags are meta tags with property attributes starting with "og:"
|
||||
*/
|
||||
fun extractOpenGraphTags(document: Document): OpenGraph {
|
||||
// Select all meta tags with property attributes starting with "og:"
|
||||
val ogTags = document.select("meta[property^=og:]")
|
||||
|
||||
// Extract the basic required Open Graph properties
|
||||
val title = ogTags.select("meta[property=og:title]").attr("content")
|
||||
val image = ogTags.select("meta[property=og:image]").attr("content")
|
||||
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
|
||||
|
||||
return OpenGraph(title, image, description, url, type)
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints all Open Graph tags found in a document
|
||||
*/
|
||||
fun printAllOpenGraphTags(document: Document) {
|
||||
val ogTags = document.select("meta[property^=og:]")
|
||||
println("Found ${ogTags.size} Open Graph tags:")
|
||||
|
||||
ogTags.forEach { tag ->
|
||||
val property = tag.attr("property")
|
||||
val content = tag.attr("content")
|
||||
println("$property: $content")
|
||||
}
|
||||
}
|
||||
val html = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Open Graph Example</title>
|
||||
<meta property="og:title" content="The Rock" />
|
||||
<meta property="og:type" content="video.movie" />
|
||||
<meta property="og:url" content="https://example.com/the-rock" />
|
||||
<meta property="og:image" content="https://example.com/rock.jpg" />
|
||||
<meta property="og:description" content="An action movie about a rock" />
|
||||
<meta property="og:site_name" content="Example Movies" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>Example Page</h1>
|
||||
</body>
|
||||
</html>
|
||||
""".trimIndent()
|
||||
|
||||
fun main() {
|
||||
// Wikipedia doesn't have many Open Graph tags, so let's try a site that likely has them
|
||||
val doc = Jsoup.connect("https://www.imdb.com/title/tt0068646/").get() // The Godfather movie page
|
||||
println("Page title: ${doc.title()}")
|
||||
|
||||
// Print all Open Graph tags
|
||||
printAllOpenGraphTags(doc)
|
||||
val fetcher = DocumentFetcher()
|
||||
|
||||
// Extract Open Graph data into our data class
|
||||
try {
|
||||
val ogData = extractOpenGraphTags(doc)
|
||||
println("\nExtracted Open Graph data:")
|
||||
println("Title: ${ogData.title}")
|
||||
println("Image: ${ogData.image}")
|
||||
println("Description: ${ogData.description}")
|
||||
println("URL: ${ogData.url}")
|
||||
println("Type: ${ogData.type}")
|
||||
} catch (e: Exception) {
|
||||
println("Error extracting Open Graph data: ${e.message}")
|
||||
}
|
||||
val docUrl = fetcher.fromUrl("https://www.imdb.com/title/tt0068646/")
|
||||
val docString = fetcher.fromString(html)
|
||||
|
||||
val ogUrl = Parser().extractOpenGraphTags(docUrl)
|
||||
println(ogUrl)
|
||||
println("-------------")
|
||||
val ogString = Parser().extractOpenGraphTags(docString)
|
||||
println(ogString)
|
||||
}
|
||||
|
||||
24
src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt
Normal file
24
src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt
Normal file
@@ -0,0 +1,24 @@
|
||||
package nl.lengrand.opengraphkt.nl.lengrand.opengraphkt
|
||||
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
|
||||
|
||||
/*
|
||||
DocumentFetcher's job is to take any type of input and transform it into a JSoup document for the Parser to then do its job
|
||||
*/
|
||||
class DocumentFetcher {
|
||||
|
||||
fun fromUrl(url: String): Document {
|
||||
return Jsoup.connect(url).get()
|
||||
}
|
||||
|
||||
fun fromString(html: String): Document {
|
||||
return Jsoup.parse(html)
|
||||
}
|
||||
|
||||
fun fromFile() : Document {
|
||||
TODO()
|
||||
}
|
||||
|
||||
}
|
||||
34
src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt
Normal file
34
src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt
Normal file
@@ -0,0 +1,34 @@
|
||||
package nl.lengrand.opengraphkt
|
||||
|
||||
import org.jsoup.nodes.Document
|
||||
|
||||
data class OpenGraph(
|
||||
val title: String,
|
||||
val image: String,
|
||||
val description: String? = null,
|
||||
val url: String? = null,
|
||||
val type: String? = null
|
||||
)
|
||||
|
||||
class Parser {
|
||||
|
||||
/**
|
||||
* Extracts Open Graph tags from a JSoup Document
|
||||
* Open Graph tags are meta tags with property attributes starting with "og:"
|
||||
*/
|
||||
fun extractOpenGraphTags(document: Document): OpenGraph {
|
||||
val ogTags = document.select("meta[property^=og:]")
|
||||
|
||||
println(ogTags)
|
||||
|
||||
// Extract the basic required Open Graph properties
|
||||
val title = ogTags.select("meta[property=og:title]").attr("content")
|
||||
val image = ogTags.select("meta[property=og:image]").attr("content")
|
||||
val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() }
|
||||
val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() }
|
||||
|
||||
return OpenGraph(title, image, description, url, type)
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user