diff --git a/src/main/kotlin/nl/lengrand/opengraphkt/OpenGraphParser.kt b/src/main/kotlin/nl/lengrand/opengraphkt/OpenGraphParser.kt
new file mode 100644
index 0000000..798d0b7
--- /dev/null
+++ b/src/main/kotlin/nl/lengrand/opengraphkt/OpenGraphParser.kt
@@ -0,0 +1,600 @@
+package nl.lengrand.opengraphkt
+
+import org.jsoup.nodes.Document
+import org.jsoup.select.Elements
+
+/**
+ * A comprehensive parser for Open Graph protocol tags.
+ *
+ * The Open Graph protocol enables any web page to become a rich object in a social graph.
+ * This parser extracts all Open Graph tags from an HTML document and organizes them into
+ * a structured format according to the Open Graph protocol specification.
+ *
+ * @see Open Graph Protocol
+ */
+class OpenGraphParser {
+
+ /**
+ * Extracts all Open Graph tags from a JSoup Document and returns a structured OpenGraphData object.
+ *
+ * @param document The JSoup Document to parse
+ * @return An OpenGraphData object containing all extracted Open Graph data
+ */
+ fun parse(document: Document): OpenGraphData {
+ val tags = document.select("meta[property^=og:]")
+ val openGraphTags = extractOpenGraphTags(tags)
+
+ return buildOpenGraphData(openGraphTags)
+ }
+
+ /**
+ * Extracts Open Graph tags from JSoup Elements and converts them to OpenGraphTag objects.
+ *
+ * @param elements The JSoup Elements containing Open Graph meta tags
+ * @return A list of OpenGraphTag objects
+ */
+ private fun extractOpenGraphTags(elements: Elements): List {
+ return elements.map { element ->
+ val fullProperty = element.attr("property")
+ val property = fullProperty.substring(3) // Remove "og:" prefix
+ val content = element.attr("content")
+
+ OpenGraphTag(property, content)
+ }
+ }
+
+ /**
+ * Builds an OpenGraphData object from a list of OpenGraphTag objects.
+ *
+ * @param tags The list of OpenGraphTag objects
+ * @return An OpenGraphData object containing structured Open Graph data
+ */
+ private fun buildOpenGraphData(tags: List): OpenGraphData {
+ // Group tags by their namespace (before the first colon)
+ val groupedTags = tags.groupBy { tag ->
+ if (tag.property.contains(":")) {
+ val parts = tag.property.split(":", limit = 2)
+ parts[0]
+ } else {
+ tag.property
+ }
+ }
+
+ // Build basic properties
+ val title = getFirstTagContent(tags, "title")
+ val type = getFirstTagContent(tags, "type")
+ val url = getFirstTagContent(tags, "url")
+ val description = getFirstTagContent(tags, "description")
+ val siteName = getFirstTagContent(tags, "site_name")
+ val determiner = getFirstTagContent(tags, "determiner")
+ val locale = getFirstTagContent(tags, "locale")
+ val localeAlternate = getTagsContent(tags, "locale:alternate")
+
+ // Build structured properties
+ val images = buildImages(groupedTags.getOrDefault("image", emptyList()))
+ val videos = buildVideos(groupedTags.getOrDefault("video", emptyList()))
+ val audios = buildAudios(groupedTags.getOrDefault("audio", emptyList()))
+
+ // Build article specific properties if type is "article"
+ val article = if (type == "article") buildArticle(groupedTags) else null
+
+ // Build profile specific properties if type is "profile"
+ val profile = if (type == "profile") buildProfile(groupedTags) else null
+
+ // Build book specific properties if type is "book"
+ val book = if (type == "book") buildBook(groupedTags) else null
+
+ return OpenGraphData(
+ rawTags = tags,
+ title = title,
+ type = type,
+ url = url,
+ description = description,
+ siteName = siteName,
+ determiner = determiner,
+ locale = locale,
+ localeAlternate = localeAlternate,
+ images = images,
+ videos = videos,
+ audios = audios,
+ article = article,
+ profile = profile,
+ book = book
+ )
+ }
+
+ /**
+ * Gets the content of the first tag with the specified property.
+ *
+ * @param tags The list of OpenGraphTag objects
+ * @param property The property to look for
+ * @return The content of the first tag with the specified property, or null if not found
+ */
+ private fun getFirstTagContent(tags: List, property: String): String? {
+ return tags.firstOrNull { it.property == property }?.content
+ }
+
+ /**
+ * Gets the content of all tags with the specified property.
+ *
+ * @param tags The list of OpenGraphTag objects
+ * @param property The property to look for
+ * @return A list of content values from all tags with the specified property
+ */
+ private fun getTagsContent(tags: List, property: String): List {
+ return tags.filter { it.property == property }.map { it.content }
+ }
+
+ /**
+ * Builds a list of OpenGraphImage objects from image tags.
+ *
+ * @param imageTags The list of image-related OpenGraphTag objects
+ * @return A list of OpenGraphImage objects
+ */
+ private fun buildImages(imageTags: List): List {
+ // For multiple images, we need a different approach
+ // First, find all base image tags (those with property "image" or "image:url")
+ val baseImageTags = imageTags.filter {
+ it.property == "image" || it.property == "image:url"
+ }
+
+ // If we have no base image tags, return an empty list
+ if (baseImageTags.isEmpty()) {
+ return emptyList()
+ }
+
+ // Create a list to hold our image objects
+ val images = mutableListOf()
+
+ // For each base image tag, create an image object and find its attributes
+ baseImageTags.forEach { baseTag ->
+ // Find the index of this base tag in the original list
+ val baseIndex = imageTags.indexOf(baseTag)
+
+ // Find all attribute tags that come after this base tag and before the next base tag
+ val nextBaseIndex = imageTags.subList(baseIndex + 1, imageTags.size)
+ .indexOfFirst { it.property == "image" || it.property == "image:url" }
+
+ val endIndex = if (nextBaseIndex == -1) imageTags.size else baseIndex + 1 + nextBaseIndex
+ val attributeTags = imageTags.subList(baseIndex + 1, endIndex)
+ .filter { it.property.startsWith("image:") }
+
+ // Extract attributes
+ val secureUrl = attributeTags.firstOrNull { it.property == "image:secure_url" }?.content
+ val type = attributeTags.firstOrNull { it.property == "image:type" }?.content
+ val width = attributeTags.firstOrNull { it.property == "image:width" }?.content?.toIntOrNull()
+ val height = attributeTags.firstOrNull { it.property == "image:height" }?.content?.toIntOrNull()
+ val alt = attributeTags.firstOrNull { it.property == "image:alt" }?.content
+
+ // Create the image object
+ images.add(OpenGraphImage(
+ url = baseTag.content,
+ secureUrl = secureUrl,
+ type = type,
+ width = width,
+ height = height,
+ alt = alt
+ ))
+ }
+
+ return images
+ }
+
+ /**
+ * Builds a list of OpenGraphVideo objects from video tags.
+ *
+ * @param videoTags The list of video-related OpenGraphTag objects
+ * @return A list of OpenGraphVideo objects
+ */
+ private fun buildVideos(videoTags: List): List {
+ // For multiple videos, we need a different approach
+ // First, find all base video tags (those with property "video" or "video:url")
+ val baseVideoTags = videoTags.filter {
+ it.property == "video" || it.property == "video:url"
+ }
+
+ // If we have no base video tags, return an empty list
+ if (baseVideoTags.isEmpty()) {
+ return emptyList()
+ }
+
+ // Create a list to hold our video objects
+ val videos = mutableListOf()
+
+ // For each base video tag, create a video object and find its attributes
+ baseVideoTags.forEach { baseTag ->
+ // Find the index of this base tag in the original list
+ val baseIndex = videoTags.indexOf(baseTag)
+
+ // Find all attribute tags that come after this base tag and before the next base tag
+ val nextBaseIndex = videoTags.subList(baseIndex + 1, videoTags.size)
+ .indexOfFirst { it.property == "video" || it.property == "video:url" }
+
+ val endIndex = if (nextBaseIndex == -1) videoTags.size else baseIndex + 1 + nextBaseIndex
+ val attributeTags = videoTags.subList(baseIndex + 1, endIndex)
+ .filter { it.property.startsWith("video:") }
+
+ // Extract attributes
+ val secureUrl = attributeTags.firstOrNull { it.property == "video:secure_url" }?.content
+ val type = attributeTags.firstOrNull { it.property == "video:type" }?.content
+ val width = attributeTags.firstOrNull { it.property == "video:width" }?.content?.toIntOrNull()
+ val height = attributeTags.firstOrNull { it.property == "video:height" }?.content?.toIntOrNull()
+ val duration = attributeTags.firstOrNull { it.property == "video:duration" }?.content?.toIntOrNull()
+
+ // Create the video object
+ videos.add(OpenGraphVideo(
+ url = baseTag.content,
+ secureUrl = secureUrl,
+ type = type,
+ width = width,
+ height = height,
+ duration = duration
+ ))
+ }
+
+ return videos
+ }
+
+ /**
+ * Builds a list of OpenGraphAudio objects from audio tags.
+ *
+ * @param audioTags The list of audio-related OpenGraphTag objects
+ * @return A list of OpenGraphAudio objects
+ */
+ private fun buildAudios(audioTags: List): List {
+ // For multiple audios, we need a different approach
+ // First, find all base audio tags (those with property "audio" or "audio:url")
+ val baseAudioTags = audioTags.filter {
+ it.property == "audio" || it.property == "audio:url"
+ }
+
+ // If we have no base audio tags, return an empty list
+ if (baseAudioTags.isEmpty()) {
+ return emptyList()
+ }
+
+ // Create a list to hold our audio objects
+ val audios = mutableListOf()
+
+ // For each base audio tag, create an audio object and find its attributes
+ baseAudioTags.forEach { baseTag ->
+ // Find the index of this base tag in the original list
+ val baseIndex = audioTags.indexOf(baseTag)
+
+ // Find all attribute tags that come after this base tag and before the next base tag
+ val nextBaseIndex = audioTags.subList(baseIndex + 1, audioTags.size)
+ .indexOfFirst { it.property == "audio" || it.property == "audio:url" }
+
+ val endIndex = if (nextBaseIndex == -1) audioTags.size else baseIndex + 1 + nextBaseIndex
+ val attributeTags = audioTags.subList(baseIndex + 1, endIndex)
+ .filter { it.property.startsWith("audio:") }
+
+ // Extract attributes
+ val secureUrl = attributeTags.firstOrNull { it.property == "audio:secure_url" }?.content
+ val type = attributeTags.firstOrNull { it.property == "audio:type" }?.content
+
+ // Create the audio object
+ audios.add(OpenGraphAudio(
+ url = baseTag.content,
+ secureUrl = secureUrl,
+ type = type
+ ))
+ }
+
+ return audios
+ }
+
+ /**
+ * Builds an OpenGraphArticle object from article-related tags.
+ *
+ * @param groupedTags The map of grouped OpenGraphTag objects
+ * @return An OpenGraphArticle object, or null if no article tags are found
+ */
+ private fun buildArticle(groupedTags: Map>): OpenGraphArticle? {
+ val articleTags = groupedTags.getOrDefault("article", emptyList())
+
+ if (articleTags.isEmpty()) {
+ return null
+ }
+
+ val publishedTime = articleTags.firstOrNull { it.property == "article:published_time" }?.content
+ val modifiedTime = articleTags.firstOrNull { it.property == "article:modified_time" }?.content
+ val expirationTime = articleTags.firstOrNull { it.property == "article:expiration_time" }?.content
+ val section = articleTags.firstOrNull { it.property == "article:section" }?.content
+ val authors = articleTags.filter { it.property == "article:author" }.map { it.content }
+ val tags = articleTags.filter { it.property == "article:tag" }.map { it.content }
+
+ return OpenGraphArticle(
+ publishedTime = publishedTime,
+ modifiedTime = modifiedTime,
+ expirationTime = expirationTime,
+ section = section,
+ authors = authors,
+ tags = tags
+ )
+ }
+
+ /**
+ * Builds an OpenGraphProfile object from profile-related tags.
+ *
+ * @param groupedTags The map of grouped OpenGraphTag objects
+ * @return An OpenGraphProfile object, or null if no profile tags are found
+ */
+ private fun buildProfile(groupedTags: Map>): OpenGraphProfile? {
+ val profileTags = groupedTags.getOrDefault("profile", emptyList())
+
+ if (profileTags.isEmpty()) {
+ return null
+ }
+
+ val firstName = profileTags.firstOrNull { it.property == "profile:first_name" }?.content
+ val lastName = profileTags.firstOrNull { it.property == "profile:last_name" }?.content
+ val username = profileTags.firstOrNull { it.property == "profile:username" }?.content
+ val gender = profileTags.firstOrNull { it.property == "profile:gender" }?.content
+
+ return OpenGraphProfile(
+ firstName = firstName,
+ lastName = lastName,
+ username = username,
+ gender = gender
+ )
+ }
+
+ /**
+ * Builds an OpenGraphBook object from book-related tags.
+ *
+ * @param groupedTags The map of grouped OpenGraphTag objects
+ * @return An OpenGraphBook object, or null if no book tags are found
+ */
+ private fun buildBook(groupedTags: Map>): OpenGraphBook? {
+ val bookTags = groupedTags.getOrDefault("book", emptyList())
+
+ if (bookTags.isEmpty()) {
+ return null
+ }
+
+ val authors = bookTags.filter { it.property == "book:author" }.map { it.content }
+ val isbn = bookTags.firstOrNull { it.property == "book:isbn" }?.content
+ val releaseDate = bookTags.firstOrNull { it.property == "book:release_date" }?.content
+ val tags = bookTags.filter { it.property == "book:tag" }.map { it.content }
+
+ return OpenGraphBook(
+ authors = authors,
+ isbn = isbn,
+ releaseDate = releaseDate,
+ tags = tags
+ )
+ }
+
+ /**
+ * Groups structured tags (like image:width, image:height) by their index.
+ *
+ * @param tags The list of structured OpenGraphTag objects
+ * @return A map of index to list of tags
+ */
+ private fun groupStructuredTags(tags: List): Map> {
+ // If there are no tags, return an empty map
+ if (tags.isEmpty()) {
+ return emptyMap()
+ }
+
+ // If there's only one item with no index, return it as index 0
+ if (tags.size == 1 && !tags[0].property.contains(":")) {
+ return mapOf(0 to tags)
+ }
+
+ // For multiple images/videos/audios, we need to handle them differently
+ // First, identify the base properties (image, video, audio) without any additional attributes
+ val baseTags = tags.filter {
+ !it.property.contains(":") ||
+ it.property.endsWith(":url")
+ }
+
+ // If we have multiple base tags, we need to create separate groups for each
+ if (baseTags.size > 1) {
+ val result = mutableMapOf>()
+
+ // Add each base tag as a separate group
+ baseTags.forEachIndexed { index, baseTag ->
+ result[index] = mutableListOf(baseTag)
+ }
+
+ // Now distribute the attribute tags to the appropriate base tag
+ // For simplicity, we'll assign attributes to the nearest preceding base tag
+ val attributeTags = tags.filter {
+ it.property.contains(":") &&
+ !it.property.endsWith(":url")
+ }
+
+ // Group attribute tags by their base property (before the first colon)
+ val groupedAttributeTags = attributeTags.groupBy { tag ->
+ tag.property.split(":", limit = 2)[0]
+ }
+
+ // For each base property, find all its attributes and distribute them
+ groupedAttributeTags.forEach { (baseProperty, attributes) ->
+ // Find all base tags with this property
+ val baseIndices = baseTags.mapIndexedNotNull { index, tag ->
+ if (tag.property == baseProperty || tag.property == "$baseProperty:url") index else null
+ }
+
+ // If we have explicit indices in the attributes, use them
+ val indexedAttributes = attributes.filter { it.property.matches(Regex(".*:\\d+:.*")) }
+ .groupBy { tag ->
+ val regex = Regex(".*:(\\d+):.*")
+ val matchResult = regex.find(tag.property)
+ matchResult?.groupValues?.get(1)?.toIntOrNull() ?: 0
+ }
+
+ // Add indexed attributes to the appropriate base tag
+ indexedAttributes.forEach { (attrIndex, attrs) ->
+ if (attrIndex < baseIndices.size) {
+ result[baseIndices[attrIndex]]?.addAll(attrs) ?: run {
+ result[baseIndices[attrIndex]] = attrs.toMutableList()
+ }
+ }
+ }
+
+ // Handle non-indexed attributes
+ val nonIndexedAttributes = attributes.filter { !it.property.matches(Regex(".*:\\d+:.*")) }
+
+ // Distribute non-indexed attributes to all base tags of this type
+ // For width, height, etc. that should apply to a specific image, this is not ideal,
+ // but without explicit indices, we can't know which attribute belongs to which base tag
+ baseIndices.forEachIndexed { i, baseIndex ->
+ // For the first base tag, add all non-indexed attributes
+ // For subsequent base tags, only add attributes that make sense to duplicate
+ if (i == 0 || nonIndexedAttributes.none { it.property.contains("width") || it.property.contains("height") }) {
+ result[baseIndex]?.addAll(nonIndexedAttributes) ?: run {
+ result[baseIndex] = nonIndexedAttributes.toMutableList()
+ }
+ }
+ }
+ }
+
+ return result
+ }
+
+ // If we only have one base tag or no base tags, fall back to the original logic
+ // Group tags by their explicit index if available
+ val indexedTags = tags.filter { it.property.matches(Regex(".*:\\d+:.*")) }
+ .groupBy { tag ->
+ val regex = Regex(".*:(\\d+):.*")
+ val matchResult = regex.find(tag.property)
+ matchResult?.groupValues?.get(1)?.toIntOrNull() ?: 0
+ }
+
+ // Handle tags without explicit index
+ val nonIndexedTags = tags.filter { !it.property.matches(Regex(".*:\\d+:.*")) }
+
+ // If we have indexed tags, merge non-indexed tags with index 0
+ if (indexedTags.isNotEmpty()) {
+ val result = indexedTags.toMutableMap()
+ if (nonIndexedTags.isNotEmpty()) {
+ result[0] = (result[0] ?: emptyList()) + nonIndexedTags
+ }
+ return result
+ }
+
+ // If we only have non-indexed tags, treat them as a single item
+ return mapOf(0 to nonIndexedTags)
+ }
+}
+
+// Using the existing OpenGraphTag class from Parser.kt
+
+/**
+ * Represents structured Open Graph data extracted from HTML.
+ */
+data class OpenGraphData(
+ val rawTags: List,
+
+ // Basic metadata
+ val title: String?,
+ val type: String?,
+ val url: String?,
+ val description: String?,
+ val siteName: String?,
+ val determiner: String?,
+ val locale: String?,
+ val localeAlternate: List,
+
+ // Structured properties
+ val images: List,
+ val videos: List,
+ val audios: List,
+
+ // Optional type-specific metadata
+ val article: OpenGraphArticle?,
+ val profile: OpenGraphProfile?,
+ val book: OpenGraphBook?
+) {
+ /**
+ * Checks if this Open Graph data contains the minimum required properties.
+ *
+ * According to the Open Graph protocol, the minimum required properties are:
+ * - og:title
+ * - og:type
+ * - og:image
+ * - og:url
+ *
+ * @return true if all required properties are present, false otherwise
+ */
+ fun isValid(): Boolean {
+ return title != null && type != null && images.isNotEmpty() && url != null
+ }
+
+ /**
+ * Gets the first image URL, or null if no images are present.
+ *
+ * @return The URL of the first image, or null
+ */
+ fun getFirstImageUrl(): String? {
+ return images.firstOrNull()?.url
+ }
+}
+
+/**
+ * Represents an Open Graph image.
+ */
+data class OpenGraphImage(
+ val url: String?,
+ val secureUrl: String?,
+ val type: String?,
+ val width: Int?,
+ val height: Int?,
+ val alt: String?
+)
+
+/**
+ * Represents an Open Graph video.
+ */
+data class OpenGraphVideo(
+ val url: String?,
+ val secureUrl: String?,
+ val type: String?,
+ val width: Int?,
+ val height: Int?,
+ val duration: Int?
+)
+
+/**
+ * Represents an Open Graph audio.
+ */
+data class OpenGraphAudio(
+ val url: String?,
+ val secureUrl: String?,
+ val type: String?
+)
+
+/**
+ * Represents Open Graph article metadata.
+ */
+data class OpenGraphArticle(
+ val publishedTime: String?,
+ val modifiedTime: String?,
+ val expirationTime: String?,
+ val section: String?,
+ val authors: List,
+ val tags: List
+)
+
+/**
+ * Represents Open Graph profile metadata.
+ */
+data class OpenGraphProfile(
+ val firstName: String?,
+ val lastName: String?,
+ val username: String?,
+ val gender: String?
+)
+
+/**
+ * Represents Open Graph book metadata.
+ */
+data class OpenGraphBook(
+ val authors: List,
+ val isbn: String?,
+ val releaseDate: String?,
+ val tags: List
+)
diff --git a/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt b/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt
index 4fc88c0..f520c56 100644
--- a/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt
+++ b/src/main/kotlin/nl/lengrand/opengraphkt/Parser.kt
@@ -14,11 +14,21 @@ data class OpenGraph(
val rawTags: Elements,
val tags: List,
+ // Minimal
val title: String? = null,
val type: String? = null,
val image: String? = null, // Do we just take the first here? There might be several
val url: String? = null,
+ // Optional
+ val audio: String? = null,
+ val description: String? = null,
+ val determiner: String? = null,
+ val locale: String? = null,
+// val localeAlternate: List = emptyList(),
+ val siteName: String? = null,
+ val video: String? = null,
+
// TODO : Continue with more
){
/**
@@ -34,6 +44,11 @@ data class OpenGraph(
class Parser {
+ private fun getTagContent(tags: Elements, tag: String) : String? {
+ return if (tags.select("meta[property=og:${tag}]").isEmpty()) null
+ else tags.select("meta[property=og:${tag}]").attr("content")
+ }
+
/**
* Extracts Open Graph tags from a JSoup Document
* Open Graph tags are meta tags with property attributes starting with "og:"
@@ -50,18 +65,20 @@ class Parser {
println(tags)
println(cleanTags)
- val title =
- if (tags.select("meta[property=og:title]").isEmpty()) null
- else tags.select("meta[property=og:title]").attr("content")
- val image =
- if (tags.select("meta[property=og:image]").isEmpty()) null
- else tags.select("meta[property=og:image]").attr("content")
- val url =
- if (tags.select("meta[property=og:url]").isEmpty()) null
- else tags.select("meta[property=og:url]").attr("content")
- val type =
- if (tags.select("meta[property=og:type]").isEmpty()) null
- else tags.select("meta[property=og:type]").attr("content")
+ // Minimal
+ val title = getTagContent(tags, "title")
+ val image = getTagContent(tags, "image")
+ val url = getTagContent(tags, "url")
+ val type = getTagContent(tags, "type")
+
+ // Optional
+ val audio = getTagContent(tags, "audio")
+ val description = getTagContent(tags, "description")
+ val determiner = getTagContent(tags, "determiner")
+ val locale = getTagContent(tags, "locale")
+ val siteName = getTagContent(tags, "site_name")
+ val video = getTagContent(tags, "video")
+
return OpenGraph(
tags,
@@ -69,7 +86,14 @@ class Parser {
title,
type,
image,
- url )
+ url,
+ audio,
+ description,
+ determiner,
+ locale,
+ siteName,
+ video
+ )
}
}
diff --git a/src/main/kotlin/nl/lengrand/opengraphkt/examples/OpenGraphParserExample.kt b/src/main/kotlin/nl/lengrand/opengraphkt/examples/OpenGraphParserExample.kt
new file mode 100644
index 0000000..27e7ab6
--- /dev/null
+++ b/src/main/kotlin/nl/lengrand/opengraphkt/examples/OpenGraphParserExample.kt
@@ -0,0 +1,156 @@
+package nl.lengrand.opengraphkt.examples
+
+import nl.lengrand.opengraphkt.OpenGraphParser
+import nl.lengrand.opengraphkt.nl.lengrand.opengraphkt.DocumentFetcher
+
+/**
+ * Example demonstrating how to use the OpenGraphParser to extract Open Graph data from HTML.
+ */
+fun main() {
+ // Create instances of the parser and document fetcher
+ val parser = OpenGraphParser()
+ val fetcher = DocumentFetcher()
+
+ // Example 1: Parse Open Graph data from a URL
+ println("Example 1: Parsing from URL")
+ try {
+ val document = fetcher.fromUrl("https://www.imdb.com/title/tt0068646/")
+ val openGraphData = parser.parse(document)
+
+ println("Title: ${openGraphData.title}")
+ println("Type: ${openGraphData.type}")
+ println("URL: ${openGraphData.url}")
+ println("Description: ${openGraphData.description}")
+ println("Site Name: ${openGraphData.siteName}")
+
+ println("Images: ${openGraphData.images.size}")
+ openGraphData.images.forEachIndexed { index, image ->
+ println("Image ${index + 1}: ${image.url}")
+ println(" Width: ${image.width}")
+ println(" Height: ${image.height}")
+ println(" Alt: ${image.alt}")
+ }
+
+ println("Is valid: ${openGraphData.isValid()}")
+ } catch (e: Exception) {
+ println("Error parsing URL: ${e.message}")
+ }
+
+ // Example 2: Parse Open Graph data from an HTML string
+ println("\nExample 2: Parsing from HTML string")
+ val html = """
+
+
+
+ Open Graph Example
+
+
+
+
+
+
+
+
+
+
+ Example Page
+
+
+ """.trimIndent()
+
+ val document = fetcher.fromString(html)
+ val openGraphData = parser.parse(document)
+
+ println("Title: ${openGraphData.title}")
+ println("Type: ${openGraphData.type}")
+ println("URL: ${openGraphData.url}")
+ println("Description: ${openGraphData.description}")
+ println("Site Name: ${openGraphData.siteName}")
+
+ println("Images: ${openGraphData.images.size}")
+ openGraphData.images.forEachIndexed { index, image ->
+ println("Image ${index + 1}: ${image.url}")
+ println(" Width: ${image.width}")
+ println(" Height: ${image.height}")
+ }
+
+ println("Is valid: ${openGraphData.isValid()}")
+
+ // Example 3: Working with multiple images
+ println("\nExample 3: Working with multiple images")
+ val multipleImagesHtml = """
+
+
+
+ Multiple Images Example
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Photo Gallery
+
+
+ """.trimIndent()
+
+ val multipleImagesDocument = fetcher.fromString(multipleImagesHtml)
+ val multipleImagesData = parser.parse(multipleImagesDocument)
+
+ println("Title: ${multipleImagesData.title}")
+ println("Images: ${multipleImagesData.images.size}")
+ multipleImagesData.images.forEachIndexed { index, image ->
+ println("Image ${index + 1}: ${image.url}")
+ println(" Width: ${image.width}")
+ println(" Height: ${image.height}")
+ }
+
+ // Example 4: Working with article metadata
+ println("\nExample 4: Working with article metadata")
+ val articleHtml = """
+
+
+
+ Article Example
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Breaking News
+
+
+ """.trimIndent()
+
+ val articleDocument = fetcher.fromString(articleHtml)
+ val articleData = parser.parse(articleDocument)
+
+ println("Title: ${articleData.title}")
+ println("Type: ${articleData.type}")
+
+ val article = articleData.article
+ if (article != null) {
+ println("Published Time: ${article.publishedTime}")
+ println("Modified Time: ${article.modifiedTime}")
+ println("Section: ${article.section}")
+ println("Authors: ${article.authors.joinToString(", ")}")
+ println("Tags: ${article.tags.joinToString(", ")}")
+ }
+}
\ No newline at end of file
diff --git a/src/test/kotlin/nl/lengrand/opengraphkt/OpenGraphParserTest.kt b/src/test/kotlin/nl/lengrand/opengraphkt/OpenGraphParserTest.kt
new file mode 100644
index 0000000..6f5be2e
--- /dev/null
+++ b/src/test/kotlin/nl/lengrand/opengraphkt/OpenGraphParserTest.kt
@@ -0,0 +1,281 @@
+package nl.lengrand.opengraphkt
+
+import nl.lengrand.opengraphkt.nl.lengrand.opengraphkt.DocumentFetcher
+import org.junit.jupiter.api.Test
+import kotlin.test.assertEquals
+import kotlin.test.assertFalse
+import kotlin.test.assertNotNull
+import kotlin.test.assertTrue
+
+class OpenGraphParserTest {
+
+ private val parser = OpenGraphParser()
+ private val fetcher = DocumentFetcher()
+
+ // Sample HTML with all required OpenGraph tags and some structured properties
+ private val completeHtml = """
+
+
+
+ Open Graph Example
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Example Page
+
+
+ """.trimIndent()
+
+ // Sample HTML with article-specific tags
+ private val articleHtml = """
+
+
+
+ Article Example
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Breaking News
+
+
+ """.trimIndent()
+
+ // Sample HTML with profile-specific tags
+ private val profileHtml = """
+
+
+
+ Profile Example
+
+
+
+
+
+
+
+
+
+
+
+ John Doe
+
+
+ """.trimIndent()
+
+ // Sample HTML with book-specific tags
+ private val bookHtml = """
+
+
+
+ Book Example
+
+
+
+
+
+
+
+
+
+
+
+
+ The Great Novel
+
+
+ """.trimIndent()
+
+ // Sample HTML with multiple images
+ private val multipleImagesHtml = """
+
+
+
+ Multiple Images Example
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Photo Gallery
+
+
+ """.trimIndent()
+
+ @Test
+ fun `test parse with complete OpenGraph tags`() {
+ val document = fetcher.fromString(completeHtml)
+ val openGraphData = parser.parse(document)
+
+ // Verify that all required properties are extracted correctly
+ assertEquals("The Rock", openGraphData.title)
+ assertEquals("video.movie", openGraphData.type)
+ assertEquals("https://example.com/the-rock", openGraphData.url)
+
+ // Verify that the OpenGraphData object is valid
+ assertTrue(openGraphData.isValid())
+
+ // Verify that all tags are extracted
+ assertEquals(18, openGraphData.rawTags.size)
+
+ // Verify image properties
+ assertEquals(1, openGraphData.images.size)
+ val image = openGraphData.images[0]
+ assertEquals("https://example.com/rock.jpg", image.url)
+ assertEquals(300, image.width)
+ assertEquals(200, image.height)
+ assertEquals("A promotional image for The Rock", image.alt)
+
+ // Verify video properties
+ assertEquals(1, openGraphData.videos.size)
+ val video = openGraphData.videos[0]
+ assertEquals("https://example.com/rock-trailer.mp4", video.url)
+ assertEquals(1280, video.width)
+ assertEquals(720, video.height)
+ assertEquals("video/mp4", video.type)
+
+ // Verify audio properties
+ assertEquals(1, openGraphData.audios.size)
+ val audio = openGraphData.audios[0]
+ assertEquals("https://example.com/rock-theme.mp3", audio.url)
+ assertEquals("audio/mpeg", audio.type)
+
+ // Verify locale properties
+ assertEquals("en_US", openGraphData.locale)
+ assertEquals(2, openGraphData.localeAlternate.size)
+ assertTrue(openGraphData.localeAlternate.contains("fr_FR"))
+ assertTrue(openGraphData.localeAlternate.contains("es_ES"))
+ }
+
+ @Test
+ fun `test parse with article-specific tags`() {
+ val document = fetcher.fromString(articleHtml)
+ val openGraphData = parser.parse(document)
+
+ // Verify basic properties
+ assertEquals("Breaking News", openGraphData.title)
+ assertEquals("article", openGraphData.type)
+ assertEquals("https://example.com/news/breaking", openGraphData.url)
+ assertEquals("Latest breaking news", openGraphData.description)
+
+ // Verify article-specific properties
+ assertNotNull(openGraphData.article)
+ assertEquals("2023-01-01T00:00:00Z", openGraphData.article?.publishedTime)
+ assertEquals("2023-01-02T12:00:00Z", openGraphData.article?.modifiedTime)
+ assertEquals("News", openGraphData.article?.section)
+ assertEquals(2, openGraphData.article?.authors?.size)
+ assertTrue(openGraphData.article?.authors?.contains("John Doe") ?: false)
+ assertTrue(openGraphData.article?.authors?.contains("Jane Smith") ?: false)
+ assertEquals(2, openGraphData.article?.tags?.size)
+ assertTrue(openGraphData.article?.tags?.contains("breaking") ?: false)
+ assertTrue(openGraphData.article?.tags?.contains("news") ?: false)
+ }
+
+ @Test
+ fun `test parse with profile-specific tags`() {
+ val document = fetcher.fromString(profileHtml)
+ val openGraphData = parser.parse(document)
+
+ // Verify basic properties
+ assertEquals("John Doe", openGraphData.title)
+ assertEquals("profile", openGraphData.type)
+ assertEquals("https://example.com/profile/johndoe", openGraphData.url)
+ assertEquals("John Doe's profile", openGraphData.description)
+
+ // Verify profile-specific properties
+ assertNotNull(openGraphData.profile)
+ assertEquals("John", openGraphData.profile?.firstName)
+ assertEquals("Doe", openGraphData.profile?.lastName)
+ assertEquals("johndoe", openGraphData.profile?.username)
+ assertEquals("male", openGraphData.profile?.gender)
+ }
+
+ @Test
+ fun `test parse with book-specific tags`() {
+ val document = fetcher.fromString(bookHtml)
+ val openGraphData = parser.parse(document)
+
+ // Verify basic properties
+ assertEquals("The Great Novel", openGraphData.title)
+ assertEquals("book", openGraphData.type)
+ assertEquals("https://example.com/books/great-novel", openGraphData.url)
+ assertEquals("A great novel", openGraphData.description)
+
+ // Verify book-specific properties
+ assertNotNull(openGraphData.book)
+ assertEquals(1, openGraphData.book?.authors?.size)
+ assertEquals("Famous Author", openGraphData.book?.authors?.get(0))
+ assertEquals("1234567890123", openGraphData.book?.isbn)
+ assertEquals("2023-01-01", openGraphData.book?.releaseDate)
+ assertEquals(2, openGraphData.book?.tags?.size)
+ assertTrue(openGraphData.book?.tags?.contains("fiction") ?: false)
+ assertTrue(openGraphData.book?.tags?.contains("novel") ?: false)
+ }
+
+ @Test
+ fun `test parse with multiple images`() {
+ val document = fetcher.fromString(multipleImagesHtml)
+ val openGraphData = parser.parse(document)
+
+ // Verify basic properties
+ assertEquals("Photo Gallery", openGraphData.title)
+ assertEquals("website", openGraphData.type)
+ assertEquals("https://example.com/gallery", openGraphData.url)
+ assertEquals("A gallery of images", openGraphData.description)
+
+ // Verify multiple images
+ assertEquals(3, openGraphData.images.size)
+
+ // First image
+ assertEquals("https://example.com/image1.jpg", openGraphData.images[0].url)
+ assertEquals(800, openGraphData.images[0].width)
+ assertEquals(600, openGraphData.images[0].height)
+
+ // Second image
+ assertEquals("https://example.com/image2.jpg", openGraphData.images[1].url)
+ assertEquals(1024, openGraphData.images[1].width)
+ assertEquals(768, openGraphData.images[1].height)
+
+ // Third image
+ assertEquals("https://example.com/image3.jpg", openGraphData.images[2].url)
+ assertEquals(1200, openGraphData.images[2].width)
+ assertEquals(900, openGraphData.images[2].height)
+ }
+}
\ No newline at end of file