From 0803182d88c17c4c253ea6a97ac7313fc85a2a67 Mon Sep 17 00:00:00 2001 From: Julien Lengrand-Lambert Date: Fri, 16 May 2025 23:08:28 +0200 Subject: [PATCH] Fixes #8 Starts fixing #8. Removes required dependency to JSoup for users of the library. --- README.md | 3 ++ .../kotlin/fr/lengrand/opengraphkt/Main.kt | 11 ++---- .../src/main/resources/example.html | 0 .../lengrand/opengraphkt/DocumentFetcher.kt | 29 --------------- .../lengrand/opengraphkt/OpenGraphParser.kt | 37 +++++++++++++++++++ .../opengraphkt/OpenGraphParserTest.kt | 16 +++----- 6 files changed, 49 insertions(+), 47 deletions(-) rename {opengraphkt => demo}/src/main/resources/example.html (100%) delete mode 100644 opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/DocumentFetcher.kt diff --git a/README.md b/README.md index 0230aed..ea90f46 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # OpenGraphKt +![Maven Central Version](https://img.shields.io/maven-central/v/fr.lengrand/opengraphkt) + + [OpenGraphKt](https://github.com/jlengrand/OpenGraphKt) is a minimalist Kotlin library to work with the [Open Graph tags](https://ogp.me/) protocol. OpenGraphKt is a tiny wrapper on top of JSoup. diff --git a/demo/src/main/kotlin/fr/lengrand/opengraphkt/Main.kt b/demo/src/main/kotlin/fr/lengrand/opengraphkt/Main.kt index bf6f773..8b37c9c 100644 --- a/demo/src/main/kotlin/fr/lengrand/opengraphkt/Main.kt +++ b/demo/src/main/kotlin/fr/lengrand/opengraphkt/Main.kt @@ -1,19 +1,18 @@ package fr.lengrand.opengraphkt import java.io.File +import java.net.URI /** * Example demonstrating how to use the OpenGraphParser to extract Open Graph data from HTML. */ fun main() { val parser = OpenGraphParser() - val fetcher = DocumentFetcher() // Example 1: Parse Open Graph data from a URL println("Example 1: Parsing from URL") try { - val document = fetcher.fromUrl("https://www.imdb.com/title/tt0068646/") - val openGraphData = parser.parse(document) + val openGraphData = parser.parse(URI("https://www.imdb.com/title/tt0068646/").toURL()) println("Title: ${openGraphData.title}") println("Is valid: ${openGraphData.isValid()}") @@ -28,8 +27,7 @@ fun main() { val resourceFile = File(resourceUrl.toURI()) // Parse the file - val document = fetcher.fromFile(resourceFile) - val openGraphData = parser.parse(document) + val openGraphData = parser.parse(resourceFile) println("Title: ${openGraphData.title}") println("Is valid: ${openGraphData.isValid()}") @@ -59,8 +57,7 @@ fun main() { """.trimIndent() - val document = fetcher.fromString(html) - val openGraphData = parser.parse(document) + val openGraphData = parser.parse(html) println("Title: ${openGraphData.title}") println("Is valid: ${openGraphData.isValid()}") diff --git a/opengraphkt/src/main/resources/example.html b/demo/src/main/resources/example.html similarity index 100% rename from opengraphkt/src/main/resources/example.html rename to demo/src/main/resources/example.html diff --git a/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/DocumentFetcher.kt b/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/DocumentFetcher.kt deleted file mode 100644 index e7e78ec..0000000 --- a/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/DocumentFetcher.kt +++ /dev/null @@ -1,29 +0,0 @@ -package fr.lengrand.opengraphkt - -import org.jsoup.Jsoup -import org.jsoup.nodes.Document -import java.io.File - -/** - * DocumentFetcher's job is to take any type of input and transform it into a JSoup document for the Parser to then do its job - */ -class DocumentFetcher { - - fun fromUrl(url: String): Document { - return Jsoup.connect(url).get() - } - - fun fromString(html: String): Document { - return Jsoup.parse(html) - } - - /** - * Parses HTML from a file and returns a JSoup Document - * @param file The file to parse - * @param charsetName The charset to use for parsing (default is UTF-8) - * @return A JSoup Document representing the parsed HTML - */ - fun fromFile(file: File, charsetName: String = "UTF-8") : Document { - return Jsoup.parse(file, charsetName) - } -} diff --git a/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/OpenGraphParser.kt b/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/OpenGraphParser.kt index 776d1a7..12a8409 100644 --- a/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/OpenGraphParser.kt +++ b/opengraphkt/src/main/kotlin/fr/lengrand/opengraphkt/OpenGraphParser.kt @@ -1,7 +1,10 @@ package fr.lengrand.opengraphkt +import org.jsoup.Jsoup import org.jsoup.nodes.Document import org.jsoup.select.Elements +import java.io.File +import java.net.URL data class OpenGraphTag( val property: String, @@ -149,6 +152,40 @@ class OpenGraphParser { return buildOpenGraphData(openGraphTags) } + /** + * Extracts all Open Graph tags from a URL and returns a structured OpenGraphData object. + * + * @param url The URL to be parsed for Open Graph information. + * @return An OpenGraphData object containing all extracted Open Graph data. + */ + fun parse(url: URL) : OpenGraphData { + val doc = Jsoup.connect(url.toString()).get() + return parse(doc) + } + + /** + * Extracts all Open Graph tags from a raw HTML String and returns a structured OpenGraphData object. + * + * @param html The raw HTML String to be parsed for Open Graph information. + * @return An OpenGraphData object containing all extracted Open Graph data. + */ + fun parse(html: String) : OpenGraphData { + val doc = Jsoup.parse(html) + return parse(doc) + } + + /** + * Extracts all Open Graph tags from a raw HTML String and returns a structured OpenGraphData object. + * + * @param file The file to parse + * @param charset The charset to use for parsing (default is UTF-8) + * @return An OpenGraphData object containing all extracted Open Graph data. + */ + fun parse(file: File, charset: String = "UTF-8") : OpenGraphData { + val doc = Jsoup.parse(file, charset) + return parse(doc) + } + /** * Extracts Open Graph tags from JSoup Elements and converts them to OpenGraphTag objects. * diff --git a/opengraphkt/src/test/kotlin/fr/lengrand/opengraphkt/OpenGraphParserTest.kt b/opengraphkt/src/test/kotlin/fr/lengrand/opengraphkt/OpenGraphParserTest.kt index c1ee33e..34ddd3b 100644 --- a/opengraphkt/src/test/kotlin/fr/lengrand/opengraphkt/OpenGraphParserTest.kt +++ b/opengraphkt/src/test/kotlin/fr/lengrand/opengraphkt/OpenGraphParserTest.kt @@ -8,7 +8,6 @@ import kotlin.test.assertTrue class OpenGraphParserTest { private val parser = OpenGraphParser() - private val fetcher = DocumentFetcher() // Sample HTML with all required OpenGraph tags and some structured properties private val completeHtml = """ @@ -139,8 +138,7 @@ class OpenGraphParserTest { @Test fun `test parse with complete OpenGraph tags`() { - val document = fetcher.fromString(completeHtml) - val openGraphData = parser.parse(document) + val openGraphData = parser.parse(completeHtml) // Verify that all required properties are extracted correctly assertEquals("The Rock", openGraphData.title) @@ -184,8 +182,7 @@ class OpenGraphParserTest { @Test fun `test parse with article-specific tags`() { - val document = fetcher.fromString(articleHtml) - val openGraphData = parser.parse(document) + val openGraphData = parser.parse(articleHtml) // Verify basic properties assertEquals("Breaking News", openGraphData.title) @@ -208,8 +205,7 @@ class OpenGraphParserTest { @Test fun `test parse with profile-specific tags`() { - val document = fetcher.fromString(profileHtml) - val openGraphData = parser.parse(document) + val openGraphData = parser.parse(profileHtml) // Verify basic properties assertEquals("John Doe", openGraphData.title) @@ -227,8 +223,7 @@ class OpenGraphParserTest { @Test fun `test parse with book-specific tags`() { - val document = fetcher.fromString(bookHtml) - val openGraphData = parser.parse(document) + val openGraphData = parser.parse(bookHtml) // Verify basic properties assertEquals("The Great Novel", openGraphData.title) @@ -249,8 +244,7 @@ class OpenGraphParserTest { @Test fun `test parse with multiple images`() { - val document = fetcher.fromString(multipleImagesHtml) - val openGraphData = parser.parse(document) + val openGraphData = parser.parse(multipleImagesHtml) // Verify basic properties assertEquals("Photo Gallery", openGraphData.title)