From eab730ccede15bfc9e64d69b59c76967fa1ddb6d Mon Sep 17 00:00:00 2001 From: Julien Lengrand-Lambert Date: Thu, 1 May 2025 11:19:16 +0200 Subject: [PATCH] Cleans up and structures code --- LICENSE | 21 +++++ README.md | 91 +++---------------- src/main/kotlin/Example.kt | 58 ------------ src/main/kotlin/Main.kt | 81 ++++++----------- .../lengrand/opengraphkt/DocumentFetcher.kt | 24 +++++ .../kotlin/nl/lengrand/opengraphkt/Parser.kt | 34 +++++++ 6 files changed, 118 insertions(+), 191 deletions(-) create mode 100644 LICENSE delete mode 100644 src/main/kotlin/Example.kt create mode 100644 src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt create mode 100644 src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..edb2563 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2009-2025 Jonathan Hedley + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 7a9106d..b4e3cba 100644 --- a/README.md +++ b/README.md @@ -1,85 +1,16 @@ # OpenGraphKt -A simple Kotlin project demonstrating how to extract Open Graph tags from webpages using JSoup. - -## What is Open Graph? - -Open Graph is a protocol that enables any web page to become a rich object in a social graph. It was originally created by Facebook and is now widely used by many social media platforms and websites. - -Open Graph tags are meta tags with property attributes that start with "og:". They are used to define properties like title, image, description, etc. - -## How to Extract Open Graph Tags with JSoup - -This project demonstrates several ways to extract Open Graph tags from HTML using JSoup: - -### 1. Select all Open Graph tags - -```kotlin -val allOgTags = document.select("meta[property^=og:]") -allOgTags.forEach { tag -> - println("${tag.attr("property")}: ${tag.attr("content")}") -} -``` - -The CSS selector `meta[property^=og:]` selects all meta tags with a property attribute that starts with "og:". - -### 2. Select a specific Open Graph tag - -```kotlin -val ogTitle = document.select("meta[property=og:title]").attr("content") -println("og:title: $ogTitle") -``` - -### 3. Extract all Open Graph data into a map - -```kotlin -val ogData = document.select("meta[property^=og:]") - .associate { it.attr("property") to it.attr("content") } - -ogData.forEach { (property, content) -> - println("$property: $content") -} -``` - -### 4. Using a dedicated function - -```kotlin -fun extractOpenGraphTags(document: Document): OpenGraph { - // Select all meta tags with property attributes starting with "og:" - val ogTags = document.select("meta[property^=og:]") - - // Extract the basic required Open Graph properties - val title = ogTags.select("meta[property=og:title]").attr("content") - val image = ogTags.select("meta[property=og:image]").attr("content") - val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() } - val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() } - val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() } - - return OpenGraph(title, image, description, url, type) -} -``` - -## Examples - -The project includes two examples: - -1. `Main.kt`: Connects to a real website (IMDB) and extracts Open Graph tags -2. `Example.kt`: Uses a local HTML string with Open Graph tags for demonstration - -## Running the Examples - -To run the Example.kt file: - -```bash -./gradlew run -``` - -To run the Main.kt file, update the `mainClass` in `build.gradle.kts` to "nl.lengrand.MainKt" and run: - -```bash -./gradlew run -``` +[OpenGraphKt](https://github.com/jlengrand/OpenGraphKt) is a minimalist Kotlin multiplatform library that extracts [Open Graph tags](https://ogp.me/) from HTML pages. +The input HTML can be an inlined string, a file, or a remote URL. OpenGraphKt is a tiny wrapper on top of JSoup. ## Dependencies -- JSoup 1.20.1: A Java library for working with HTML \ No newline at end of file +- [JSoup](https://jsoup.org/) + +## Author + +* [Julien Lengrand-Lambert](https://github.com/jlengrand) + +## License + +* [See License](./LICENSE) \ No newline at end of file diff --git a/src/main/kotlin/Example.kt b/src/main/kotlin/Example.kt deleted file mode 100644 index 7949783..0000000 --- a/src/main/kotlin/Example.kt +++ /dev/null @@ -1,58 +0,0 @@ -package nl.lengrand - -import org.jsoup.Jsoup -import org.jsoup.nodes.Document - -fun main() { - // Example HTML with Open Graph tags - val html = """ - - - - Open Graph Example - - - - - - - - -

Example Page

- - - """.trimIndent() - - // Parse the HTML string into a Document - val doc = Jsoup.parse(html) - - // Demonstrate how to select all Open Graph tags - println("Example 1: Select all Open Graph tags") - val allOgTags = doc.select("meta[property^=og:]") - allOgTags.forEach { tag -> - println("${tag.attr("property")}: ${tag.attr("content")}") - } - - // Demonstrate how to select a specific Open Graph tag - println("\nExample 2: Select a specific Open Graph tag") - val ogTitle = doc.select("meta[property=og:title]").attr("content") - println("og:title: $ogTitle") - - // Demonstrate how to extract all Open Graph data into a map - println("\nExample 3: Extract all Open Graph data into a map") - val ogData = doc.select("meta[property^=og:]") - .associate { it.attr("property") to it.attr("content") } - - ogData.forEach { (property, content) -> - println("$property: $content") - } - - // Demonstrate using our extractOpenGraphTags function - println("\nExample 4: Using our extractOpenGraphTags function") - val openGraph = extractOpenGraphTags(doc) - println("Title: ${openGraph.title}") - println("Image: ${openGraph.image}") - println("Description: ${openGraph.description}") - println("URL: ${openGraph.url}") - println("Type: ${openGraph.type}") -} \ No newline at end of file diff --git a/src/main/kotlin/Main.kt b/src/main/kotlin/Main.kt index 1cdf20a..4d2f71b 100644 --- a/src/main/kotlin/Main.kt +++ b/src/main/kotlin/Main.kt @@ -1,60 +1,35 @@ -package nl.lengrand +package nl.lengrand.opengraphkt -import org.jsoup.Jsoup -import org.jsoup.nodes.Document +import nl.lengrand.opengraphkt.nl.lengrand.opengraphkt.DocumentFetcher -data class OpenGraph(val title: String, val image: String, val description: String? = null, val url: String? = null, val type: String? = null) - -/** - * Extracts Open Graph tags from a JSoup Document - * Open Graph tags are meta tags with property attributes starting with "og:" - */ -fun extractOpenGraphTags(document: Document): OpenGraph { - // Select all meta tags with property attributes starting with "og:" - val ogTags = document.select("meta[property^=og:]") - - // Extract the basic required Open Graph properties - val title = ogTags.select("meta[property=og:title]").attr("content") - val image = ogTags.select("meta[property=og:image]").attr("content") - val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() } - val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() } - val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() } - - return OpenGraph(title, image, description, url, type) -} - -/** - * Prints all Open Graph tags found in a document - */ -fun printAllOpenGraphTags(document: Document) { - val ogTags = document.select("meta[property^=og:]") - println("Found ${ogTags.size} Open Graph tags:") - - ogTags.forEach { tag -> - val property = tag.attr("property") - val content = tag.attr("content") - println("$property: $content") - } -} +val html = """ + + + + Open Graph Example + + + + + + + + +

Example Page

+ + + """.trimIndent() fun main() { - // Wikipedia doesn't have many Open Graph tags, so let's try a site that likely has them - val doc = Jsoup.connect("https://www.imdb.com/title/tt0068646/").get() // The Godfather movie page - println("Page title: ${doc.title()}") - // Print all Open Graph tags - printAllOpenGraphTags(doc) + val fetcher = DocumentFetcher() - // Extract Open Graph data into our data class - try { - val ogData = extractOpenGraphTags(doc) - println("\nExtracted Open Graph data:") - println("Title: ${ogData.title}") - println("Image: ${ogData.image}") - println("Description: ${ogData.description}") - println("URL: ${ogData.url}") - println("Type: ${ogData.type}") - } catch (e: Exception) { - println("Error extracting Open Graph data: ${e.message}") - } + val docUrl = fetcher.fromUrl("https://www.imdb.com/title/tt0068646/") + val docString = fetcher.fromString(html) + + val ogUrl = Parser().extractOpenGraphTags(docUrl) + println(ogUrl) + println("-------------") + val ogString = Parser().extractOpenGraphTags(docString) + println(ogString) } diff --git a/src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt b/src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt new file mode 100644 index 0000000..c4c0650 --- /dev/null +++ b/src/main/kotlin/nl/lengrand/opengraphkt/DocumentFetcher.kt @@ -0,0 +1,24 @@ +package nl.lengrand.opengraphkt.nl.lengrand.opengraphkt + +import org.jsoup.Jsoup +import org.jsoup.nodes.Document + + +/* +DocumentFetcher's job is to take any type of input and transform it into a JSoup document for the Parser to then do its job + */ +class DocumentFetcher { + + fun fromUrl(url: String): Document { + return Jsoup.connect(url).get() + } + + fun fromString(html: String): Document { + return Jsoup.parse(html) + } + + fun fromFile() : Document { + TODO() + } + +} \ No newline at end of file diff --git a/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt b/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt new file mode 100644 index 0000000..c4cbce7 --- /dev/null +++ b/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt @@ -0,0 +1,34 @@ +package nl.lengrand.opengraphkt + +import org.jsoup.nodes.Document + +data class OpenGraph( + val title: String, + val image: String, + val description: String? = null, + val url: String? = null, + val type: String? = null +) + +class Parser { + + /** + * Extracts Open Graph tags from a JSoup Document + * Open Graph tags are meta tags with property attributes starting with "og:" + */ + fun extractOpenGraphTags(document: Document): OpenGraph { + val ogTags = document.select("meta[property^=og:]") + + println(ogTags) + + // Extract the basic required Open Graph properties + val title = ogTags.select("meta[property=og:title]").attr("content") + val image = ogTags.select("meta[property=og:image]").attr("content") + val description = ogTags.select("meta[property=og:description]").attr("content").takeIf { it.isNotEmpty() } + val url = ogTags.select("meta[property=og:url]").attr("content").takeIf { it.isNotEmpty() } + val type = ogTags.select("meta[property=og:type]").attr("content").takeIf { it.isNotEmpty() } + + return OpenGraph(title, image, description, url, type) + } + +} \ No newline at end of file