Better naming and add episodes table

This commit is contained in:
Julien Lengrand-Lambert
2020-03-11 09:06:06 +01:00
parent 9c6a42ca75
commit 0d2be018c2
13 changed files with 120 additions and 21 deletions

View File

@@ -19,6 +19,8 @@ See [LICENSE](/LICENSE)
* `Database.connect("jdbc:mysql://localhost:3308/imdb?useSSL=false", driver = "com.mysql.jdbc.Driver", user = "root", password = "aRootPassword")`
* Use `rewriteBatchedStatements=true` when inserting large volumes of data to have your driver rewrite your query
* `.map` keeps stack of memory while `for` loop doesn't? I get a OME when running with map
* reason to use partitions
* dsl loading copy paste prone
## Author

View File

@@ -1,13 +1,8 @@
package loader
import dsl.TitleRatings
import dsl.Titles
import loader.CrewsLoader
import loader.EpisodesLoader
import org.jetbrains.exposed.sql.Database
import org.jetbrains.exposed.sql.SchemaUtils
import org.jetbrains.exposed.sql.statements.BatchInsertStatement
import org.jetbrains.exposed.sql.transactions.transaction
import tsv.Reader
import kotlin.system.measureTimeMillis
fun main() {
@@ -20,6 +15,7 @@ fun main() {
password = ""
)
transaction(db) { SchemaUtils.dropDatabase("imdb") }
transaction(db) { SchemaUtils.createDatabase("imdb") }
db = Database.connect(
@@ -32,5 +28,7 @@ fun main() {
// TitleRatingsLoader.load(db)
// TitleBasicsLoader.load(db)
NameBasicsLoader.load(db)
// NameBasicsLoader.load(db)
// CrewsLoader.load(db)
EpisodesLoader.load(db)
}

View File

@@ -0,0 +1,14 @@
package dsl
import org.jetbrains.exposed.sql.Column
import org.jetbrains.exposed.sql.Table
import org.jetbrains.exposed.sql.statements.api.ExposedBlob
object Crew : Table(){
val tconst : Column<String> = varchar("tconst", 10).uniqueIndex()
val directors : Column<String> = text("directors") // TODO: better
val writers : Column<String> = text("writers") // TODO: better
override val primaryKey = PrimaryKey(tconst, name = "tconst")
}

View File

@@ -0,0 +1,14 @@
package dsl
import org.jetbrains.exposed.sql.Column
import org.jetbrains.exposed.sql.Table
object Episodes : Table(){
val tconst : Column<String> = Episodes.varchar("tconst", 10).uniqueIndex()
val parentTconst : Column<String> = Episodes.varchar("parentTconst", 10)
val seasonNumber : Column<Int?> = Episodes.integer("seasonNumber").nullable()
val episodeNumber : Column<Int?> = Episodes.integer("episodeNumber").nullable()
override val primaryKey = PrimaryKey(tconst, name = "tconst")
}

View File

@@ -9,7 +9,7 @@ object Names : Table(){
val birthYear : Column<Int?> = integer("birthYear").nullable()
val deathYear : Column<Int?> = integer("deathYear").nullable()
val primaryProfession : Column<String> = varchar("primaryProfession", 500)
val knownForTitles : Column<String> = varchar("knownForTitles", 200) // Improve!
val knownForTitles : Column<String> = varchar("knownForTitles", 200) // TODO: Improve!
override val primaryKey = PrimaryKey(nconst, name = "nconst")
}

View File

@@ -3,7 +3,7 @@ package dsl
import org.jetbrains.exposed.sql.Column
import org.jetbrains.exposed.sql.Table
object TitleRatings : Table(){
object Ratings : Table(){
val tconst : Column<String> = varchar("tconst", 10).uniqueIndex()
val averageRating : Column<Float?> = float("averageRating").nullable()
val numVotes : Column<Int?> = integer("numVotes").nullable()

View File

@@ -5,14 +5,14 @@ import org.jetbrains.exposed.sql.Table
object Titles : Table(){
val tconst : Column<String> = varchar("tconst", 10).uniqueIndex()
val titleType : Column<String> = varchar("titleType", 50) // Own Table?
val titleType : Column<String> = varchar("titleType", 50) // TODO: Own Table?
val primaryTitle : Column<String> = varchar("primaryTitle", 500)
val originalTitle : Column<String> = varchar("originalTitle", 500)
val isAdult : Column<Boolean> = bool("isAdult")
val startYear : Column<Int?> = integer("startYear").nullable()
val endYear : Column<Int?> = integer("endYear").nullable()
val runtimeMinutes : Column<Long?> = long("runtimeMinutes").nullable()
val genres : Column<String> = varchar("genres", 50) // Own Table?
val genres : Column<String> = varchar("genres", 50) // TODO: Own Table?
override val primaryKey = PrimaryKey(tconst, name = "tconst")
}

View File

@@ -0,0 +1,35 @@
package loader
import dsl.Crew
import org.jetbrains.exposed.sql.Database
import org.jetbrains.exposed.sql.statements.BatchInsertStatement
import kotlin.system.measureTimeMillis
object CrewsLoader {
fun load(db: Database){
println("Loading Crews")
val time = measureTimeMillis() { // duplication
TableLoader.process(db,
Crew,
"./datasets/title.crew.tsv",
5000,
insert()
)
}
println("Time was : ${time / 1000 / 60 } minutes ${time / 1000 % 60 } seconds")
}
}
private fun insert(): BatchInsertStatement.(String) -> Unit {
return {
val items = it.split("\t")
this[Crew.tconst] = items[0]
this[Crew.directors] = items[1]
this[Crew.writers] = items[2]
}
}

View File

@@ -0,0 +1,36 @@
package loader
import dsl.Episodes
import org.jetbrains.exposed.sql.Database
import org.jetbrains.exposed.sql.statements.BatchInsertStatement
import tsv.Reader
import kotlin.system.measureTimeMillis
object EpisodesLoader{
fun load(db: Database){
println("Loading Episodes")
val time = measureTimeMillis() { // duplication
TableLoader.process(db,
Episodes,
"./datasets/title.episode.tsv",
1000,
insert()
)
}
println("Time was : ${time / 1000 / 60 } minutes ${time / 1000 % 60 } seconds")
}
}
private fun insert(): BatchInsertStatement.(String) -> Unit {
return {
val items = it.split("\t")
this[Episodes.tconst] = items[0]
this[Episodes.parentTconst] = items[1]
this[Episodes.seasonNumber] = if (items[2] != Reader.NO_DATA) items[2].toInt() else null
this[Episodes.episodeNumber] = if (items[3] != Reader.NO_DATA) items[3].toInt() else null
}
}

View File

@@ -10,7 +10,7 @@ import org.jetbrains.exposed.sql.statements.BatchInsertStatement
import tsv.Reader
import kotlin.system.measureTimeMillis
object NameBasicsLoader{
object NameLoader{
fun load(db: Database){
println("Loading Names Basics")

View File

@@ -1,13 +1,13 @@
package loader
import dsl.TitleRatings
import dsl.Ratings
import org.jetbrains.exposed.sql.Database
import org.jetbrains.exposed.sql.statements.BatchInsertStatement
import tsv.Reader
import kotlin.system.measureTimeMillis
object TitleRatingsLoader {
object RatingsLoader {
fun load(db: Database){
println("Loading Title Ratings")
@@ -15,7 +15,7 @@ object TitleRatingsLoader {
val time = measureTimeMillis() { // duplication
TableLoader.process(db,
TitleRatings,
Ratings,
"./datasets/title.ratings.tsv",
1,
insert()
@@ -30,8 +30,8 @@ private fun insert(): BatchInsertStatement.(String) -> Unit {
return {
val items = it.split("\t")
this[TitleRatings.tconst] = items[0]
this[TitleRatings.averageRating] = if (items[1] != Reader.NO_DATA) items[1].toFloat() else null
this[TitleRatings.numVotes] = if (items[2] != Reader.NO_DATA) items[2].toInt() else null
this[Ratings.tconst] = items[0]
this[Ratings.averageRating] = if (items[1] != Reader.NO_DATA) items[1].toFloat() else null
this[Ratings.numVotes] = if (items[2] != Reader.NO_DATA) items[2].toInt() else null
}
}

View File

@@ -6,7 +6,7 @@ import org.jetbrains.exposed.sql.statements.BatchInsertStatement
import tsv.Reader
import kotlin.system.measureTimeMillis
object TitleBasicsLoader{
object TitleLoader{
fun load(db: Database){
println("Loading Title Basics")

View File

@@ -18,7 +18,7 @@ object Cities : Table() {
override val primaryKey = PrimaryKey(id, name = "PK_Cities_ID")
}
fun main() {
fun main2() {
Database.connect("jdbc:h2:mem:test", driver = "org.h2.Driver", user = "root", password = "")
transaction {