From cf7839c8a7b6d1d937275cfd15ccc81121b2dc06 Mon Sep 17 00:00:00 2001 From: Julien Lengrand-Lambert Date: Fri, 29 May 2020 15:43:18 +0200 Subject: [PATCH] last commit --- .idea/misc.xml | 3 + README.md | 59 ++----------- .../kotlin/nl/lengrand/imdb/HeavyQueries.kt | 84 +++++-------------- src/main/kotlin/nl/lengrand/imdb/LoadImdb.kt | 22 ++--- .../kotlin/nl/lengrand/imdb/api/ImdbServer.kt | 1 - .../kotlin/nl/lengrand/imdb/api/NamesApi.kt | 8 +- src/main/kotlin/nl/lengrand/imdb/dsl/Names.kt | 22 ++++- .../nl/lengrand/imdb/queries/RandomQueries.kt | 6 +- 8 files changed, 70 insertions(+), 135 deletions(-) diff --git a/.idea/misc.xml b/.idea/misc.xml index 2c38384..0d8e194 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -8,6 +8,9 @@ + + diff --git a/README.md b/README.md index 0ae3c11..a33027e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Exposed Imdb +**NOTE : I am archiving this project because imdb [closed access to its datasets](https://developer.imdb.com/) and I could not find any other complete equivalents online. Sorry!** + A simple project to learn more about [exposed](https://github.com/JetBrains/Exposed), using the [imdb datasets](https://datasets.imdbws.com/). Please note that if you use this project, you have to comply with the [Imbd license](https://www.imdb.com/interfaces/). @@ -10,61 +12,16 @@ In short, [CC BY-NC-SA 4.0](https://tldrlegal.com/license/creative-commons-attri See [LICENSE](/LICENSE) +## Running the app + +``` +$ docker-compose up to get the database running +``` + ## Learnings - -* Use `;DB_CLOSE_DELAY=-1` if you want to persist the in-memory database information over more than a single transaction. - * `Database.connect("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1", driver = "org.h2.Driver", user = "root", password = "")` -* Use `?useSSL=false` to avoid SSL exceptions (for dev only!) on MySQL. - * `Database.connect("jdbc:mysql://localhost:3308/imdb?useSSL=false", driver = "com.mysql.jdbc.Driver", user = "root", password = "aRootPassword")` -* Use `rewriteBatchedStatements=true` when inserting large volumes of data to have your driver rewrite your query * `.map` keeps stack of memory while `for` loop doesn't? I get a OME when running with map * reason to use partitions -* dsl loading copy paste prone -* Cross Join -* references - -``` -1 -[dsl.Titles.primaryTitle=The Lego Batman Movie, dsl.Titles.titleType=movie, dsl.Ratings.averageRating=7.3, dsl.Ratings.numVotes=123790] -dsl.Titles.primaryTitle=The Lego Batman Movie, dsl.Titles.titleType=movie, dsl.Ratings.averageRating=7.3, dsl.Ratings.numVotes=123790 -Ratings query took : 3170 -1 -[dsl.Titles.primaryTitle=The Lego Batman Movie, dsl.Titles.titleType=movie, dsl.Ratings.averageRating=7.3, dsl.Ratings.numVotes=123790] -dsl.Titles.primaryTitle=The Lego Batman Movie, dsl.Titles.titleType=movie, dsl.Ratings.averageRating=7.3, dsl.Ratings.numVotes=123790 -Ratings query took : 2159 - -object Titles : IntIdTable() { - val tconst: Column = (varchar("tconst", 10) references Ratings.tconst).uniqueIndex() - val titleType: Column = varchar("titleType", 50) // TODO: Own Table? - val primaryTitle: Column = varchar("primaryTitle", 500) - val originalTitle: Column = varchar("originalTitle", 500) - val isAdult: Column = bool("isAdult") - val startYear: Column = integer("startYear").nullable() - val endYear: Column = integer("endYear").nullable() - val runtimeMinutes: Column = long("runtimeMinutes").nullable() - val genres: Column = varchar("genres", 50) // TODO: Own Table? -} - -object Ratings : IntIdTable(){ - val tconst : Column = varchar("tconst", 10).uniqueIndex() - val averageRating : Column = float("averageRating").nullable() - val numVotes : Column = integer("numVotes").nullable() -} - - println("Ratings query took : ${measureTimeMillis() { - transaction(db) { - var result = (Titles innerJoin Ratings).slice(Titles.primaryTitle, Titles.titleType, Ratings.averageRating, Ratings.numVotes).select { - ((Titles.primaryTitle like "The Lego Batman Movie") and (Titles.titleType like "movie") - and Titles.tconst.eq(Ratings.tconst)) - }.toList() - - println(result.size) - println(result) - println(result.first()) - } - }}"); -``` ## Author diff --git a/src/main/kotlin/nl/lengrand/imdb/HeavyQueries.kt b/src/main/kotlin/nl/lengrand/imdb/HeavyQueries.kt index bd16428..0c5b715 100644 --- a/src/main/kotlin/nl/lengrand/imdb/HeavyQueries.kt +++ b/src/main/kotlin/nl/lengrand/imdb/HeavyQueries.kt @@ -1,9 +1,14 @@ -package nl.lengrand.imdb.queries +package nl.lengrand.imdb -import nl.lengrand.imdb.dsl.Names +//-XX:StartFlightRecording=duration=6000s,filename=myheavyrecording.jfr +import nl.lengrand.imdb.dsl.Ratings +import nl.lengrand.imdb.dsl.Titles +import nl.lengrand.imdb.dsl.Titles.primaryTitle import org.jetbrains.exposed.sql.Database +import org.jetbrains.exposed.sql.and import org.jetbrains.exposed.sql.select import org.jetbrains.exposed.sql.transactions.transaction +import kotlin.system.measureTimeMillis fun main(){ val db = Database.connect( @@ -13,64 +18,21 @@ fun main(){ password = "" ) -// Find a actors with a given name - transaction(db) { - var result = Names.select { Names.primaryName like "%cotillard" }.toList() - println(result.size) + + repeat(100){ + println("Ratings query took : ${measureTimeMillis() { + transaction(db) { + var result = (Titles crossJoin Ratings).slice(Titles.primaryTitle, Titles.titleType, Ratings.averageRating, Ratings.numVotes).select { + ((Titles.primaryTitle like "%batman%") and (Titles.titleType like "movie") + and Titles.tconst.eq(Ratings.tconst)) + } + .orderBy(Ratings.averageRating) + .toList() + + println(result.size) + println(result) + println(result.last()[primaryTitle]) + } + }}"); } - - // Trying with DAO - - - // Find all the movies someone played in - // Not possible currently, need join table - - // Find all the batman movies -// transaction(db) { -// var result = Titles.select { Titles.primaryTitle like "%batman%"}.toList() -// println(result.size) -// println(result) -// println(result.first()) -// } - - // Find the rating for a specific movie -// println("Ratings query took : ${measureTimeMillis() { -// transaction(db) { -// var result = (Titles crossJoin Ratings).slice(Titles.primaryTitle, Titles.titleType, Ratings.averageRating, Ratings.numVotes).select { -// ((Titles.primaryTitle like "The Lego Batman Movie") and (Titles.titleType like "movie") -// and Titles.tconst.eq(Ratings.tconst)) -// }.toList() -// -// println(result.size) -// println(result) -// println(result.first()) -// } -// }}"); - -// println("Ratings query took : ${measureTimeMillis() { -// transaction(db) { -// var result = (Titles innerJoin Ratings).slice(Titles.primaryTitle, Titles.titleType, Ratings.averageRating, Ratings.numVotes).select { -// ((Titles.primaryTitle like "The Lego Batman Movie") and (Titles.titleType like "movie") -// and Titles.tconst.eq(Ratings.tconst)) -// }.toList() -// -// println(result.size) -// println(result) -// println(result.first()) -// } -// }}"); - -// println("Ratings query took : ${measureTimeMillis() { -// transaction(db) { -// var result = (Titles innerJoin Ratings).slice(Titles.primaryTitle, Titles.titleType, Ratings.averageRating, Ratings.numVotes).select { -// ((Titles.primaryTitle like "%batman%") and (Titles.titleType like "movie") -// and Titles.tconst.eq(Ratings.tconst)) -// }.orderBy(Ratings.averageRating) -// .toList() -// -// println(result.size) -// println(result) -// println(result.last()) -// } -// }}"); } \ No newline at end of file diff --git a/src/main/kotlin/nl/lengrand/imdb/LoadImdb.kt b/src/main/kotlin/nl/lengrand/imdb/LoadImdb.kt index eae10ed..3d3f231 100644 --- a/src/main/kotlin/nl/lengrand/imdb/LoadImdb.kt +++ b/src/main/kotlin/nl/lengrand/imdb/LoadImdb.kt @@ -1,5 +1,8 @@ package nl.lengrand.imdb +import nl.lengrand.imdb.loader.NamesLoader +import nl.lengrand.imdb.loader.RatingsLoader +import nl.lengrand.imdb.loader.TitleLoader import org.jetbrains.exposed.sql.Database import org.jetbrains.exposed.sql.SchemaUtils import org.jetbrains.exposed.sql.transactions.transaction @@ -8,32 +11,30 @@ import kotlin.system.measureTimeMillis fun main() { // var db = Database.connect("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1", driver = "org.h2.Driver", user = "root", password = "") -// var db = Database.connect("jdbc:mysql://localhost:3308?useSSL=false&allowPublicKeyRetrieval=true&rewriteBatchedStatements=true", driver = "com.mysql.jdbc.Driver", user = "root", password = "aRootPassword") var db = Database.connect( - "jdbc:mysql://localhost:3306?useSSL=false&allowPublicKeyRetrieval=true&rewriteBatchedStatements=true", + "jdbc:mysql://localhost:3308?useSSL=false&allowPublicKeyRetrieval=true&rewriteBatchedStatements=true", driver = "com.mysql.jdbc.Driver", user = "root", - password = "" + password = "aRootPassword" ) -// transaction(db) { SchemaUtils.dropDatabase("imdb") } + transaction(db) { SchemaUtils.dropDatabase("imdb") } transaction(db) { SchemaUtils.createDatabase("imdb") } db = Database.connect( - "jdbc:mysql://localhost:3306/imdb?useSSL=false&allowPublicKeyRetrieval=true&rewriteBatchedStatements=true", + "jdbc:mysql://localhost:3308/imdb?useSSL=false&allowPublicKeyRetrieval=true&rewriteBatchedStatements=true", driver = "com.mysql.jdbc.Driver", user = "root", - password = "" + password = "aRootPassword" ) -// db = Database.connect("jdbc:mysql://localhost:3308/imdb?useSSL=false&allowPublicKeyRetrieval=true&rewriteBatchedStatements=true", driver = "com.mysql.jdbc.Driver", user = "root", password = "aRootPassword") val time = measureTimeMillis() { // duplication // TODO: Avoid overwriting -// RatingsLoader.load(db) -// TitleLoader.load(db) -// NamesLoader.load(db) + RatingsLoader.load(db) + TitleLoader.load(db) + NamesLoader.load(db) // CrewsLoader.load(db) // EpisodesLoader.load(db) // PrincipalsLoader.load(db) // Time was : 21 minutes 23 seconds @@ -41,5 +42,4 @@ fun main() { } println("---- Total time") println("Total Time was : ${time / 1000 / 60 } minutes ${time / 1000 % 60 } seconds") - } \ No newline at end of file diff --git a/src/main/kotlin/nl/lengrand/imdb/api/ImdbServer.kt b/src/main/kotlin/nl/lengrand/imdb/api/ImdbServer.kt index a774616..dcc09e1 100644 --- a/src/main/kotlin/nl/lengrand/imdb/api/ImdbServer.kt +++ b/src/main/kotlin/nl/lengrand/imdb/api/ImdbServer.kt @@ -6,6 +6,5 @@ fun main(){ Vertx.vertx().createHttpServer() .requestHandler { request -> request.response().end("Imdb Server") - }.listen(8080) } \ No newline at end of file diff --git a/src/main/kotlin/nl/lengrand/imdb/api/NamesApi.kt b/src/main/kotlin/nl/lengrand/imdb/api/NamesApi.kt index 695ad57..229fbe0 100644 --- a/src/main/kotlin/nl/lengrand/imdb/api/NamesApi.kt +++ b/src/main/kotlin/nl/lengrand/imdb/api/NamesApi.kt @@ -1,12 +1,16 @@ package nl.lengrand.imdb.api import nl.lengrand.imdb.dsl.Names +import org.jetbrains.exposed.sql.Database import org.jetbrains.exposed.sql.select +import org.jetbrains.exposed.sql.transactions.transaction class NamesApi { companion object{ - fun get(searchTerm: String){ - Names.select { Names.primaryName like "%cotillard" }.toList() + fun get(db: Database, searchTerm: String){ + transaction(db) { + Names.select { Names.primaryName like searchTerm }.toList() + } } } } \ No newline at end of file diff --git a/src/main/kotlin/nl/lengrand/imdb/dsl/Names.kt b/src/main/kotlin/nl/lengrand/imdb/dsl/Names.kt index cfefea8..53d9373 100644 --- a/src/main/kotlin/nl/lengrand/imdb/dsl/Names.kt +++ b/src/main/kotlin/nl/lengrand/imdb/dsl/Names.kt @@ -1,5 +1,8 @@ package nl.lengrand.imdb.dsl +import org.jetbrains.exposed.dao.IntEntity +import org.jetbrains.exposed.dao.IntEntityClass +import org.jetbrains.exposed.dao.id.EntityID import org.jetbrains.exposed.dao.id.IntIdTable import org.jetbrains.exposed.sql.Table @@ -11,10 +14,21 @@ object Names : IntIdTable(){ val primaryProfession = varchar("primaryProfession", 500) } -object KnownForTitles : Table(){ - val id = integer("id").autoIncrement() // Column +class Name(id: EntityID) : IntEntity(id) { + companion object : IntEntityClass(Names) + + var nconst by Names.nconst + var primaryName by Names.primaryName + var birthYear by Names.birthYear + var deathYear by Names.deathYear + var primaryProfession by Names.primaryProfession +} + +object KnownForTitles : IntIdTable(){ +// val id = integer("id").autoIncrement() // Column val nconst = varchar("nconst", 10).index("knownfor_names") val tconst = (varchar("tconst", 10) references Titles.tconst).index("knownfor_titles") - override val primaryKey = PrimaryKey(id) -} \ No newline at end of file +// override val primaryKey = PrimaryKey(id) +} + diff --git a/src/main/kotlin/nl/lengrand/imdb/queries/RandomQueries.kt b/src/main/kotlin/nl/lengrand/imdb/queries/RandomQueries.kt index 8e74260..bd16428 100644 --- a/src/main/kotlin/nl/lengrand/imdb/queries/RandomQueries.kt +++ b/src/main/kotlin/nl/lengrand/imdb/queries/RandomQueries.kt @@ -1,13 +1,9 @@ -package nl.lengrand.imdb +package nl.lengrand.imdb.queries import nl.lengrand.imdb.dsl.Names -import nl.lengrand.imdb.dsl.Ratings -import nl.lengrand.imdb.dsl.Titles import org.jetbrains.exposed.sql.Database -import org.jetbrains.exposed.sql.and import org.jetbrains.exposed.sql.select import org.jetbrains.exposed.sql.transactions.transaction -import kotlin.system.measureTimeMillis fun main(){ val db = Database.connect(