From cc1c8da1494bb5cfaaac2717a8fe93b68671cb78 Mon Sep 17 00:00:00 2001 From: Julien Lengrand-Lambert Date: Thu, 5 Mar 2020 20:55:53 +0100 Subject: [PATCH] Using batch insert --- README.md | 3 ++- docker-compose.yml | 4 +++ src/main/kotlin/dsl/TitleRatings.kt | 19 ++++---------- src/main/kotlin/loader/LoadImdb.kt | 4 +-- src/main/kotlin/loader/TitleRatingsLoader.kt | 26 +++++++++++++++++++- 5 files changed, 38 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 4adfd6a..a8bb2e8 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,8 @@ See [LICENSE](/LICENSE) * Use `;DB_CLOSE_DELAY=-1` if you want to persist the in-memory database information over more than a single transaction. * `Database.connect("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1", driver = "org.h2.Driver", user = "root", password = "")` - +* Use `?useSSL=false` to avoid SSL exceptions (for dev only!) on MySQL. + * `Database.connect("jdbc:mysql://localhost:3308/imdb?useSSL=false", driver = "com.mysql.jdbc.Driver", user = "root", password = "aRootPassword")` ## Author diff --git a/docker-compose.yml b/docker-compose.yml index 26643a7..9ec7f22 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,3 +9,7 @@ services: MYSQL_DATABASE: imdb ports: - "3308:3306" + volumes: + - my-datavolume:/var/lib/mysql +volumes: + my-datavolume: diff --git a/src/main/kotlin/dsl/TitleRatings.kt b/src/main/kotlin/dsl/TitleRatings.kt index 7b1308d..8092910 100644 --- a/src/main/kotlin/dsl/TitleRatings.kt +++ b/src/main/kotlin/dsl/TitleRatings.kt @@ -1,19 +1,18 @@ package dsl -import org.jetbrains.exposed.dao.IntEntity -import org.jetbrains.exposed.dao.IntEntityClass -import org.jetbrains.exposed.dao.id.EntityID -import org.jetbrains.exposed.dao.id.IntIdTable import org.jetbrains.exposed.sql.Column import org.jetbrains.exposed.sql.Table import org.jetbrains.exposed.sql.insert +import org.jetbrains.exposed.sql.insertIgnore import tsv.Reader.NO_DATA object TitleRatings : Table(){ - val tconst : Column = varchar("tconst", 10).uniqueIndex() + val tconst : Column = varchar("tconst", 10)//.uniqueIndex() val averageRating : Column = float("averageRating").nullable() val numVotes : Column = integer("numVotes").nullable() + override val primaryKey = PrimaryKey(tconst, name = "tconst") + fun insertFromListString(values : List){ TitleRatings.insert { it[tconst] = values[0] @@ -21,12 +20,4 @@ object TitleRatings : Table(){ it[numVotes] = if (values[2] != NO_DATA) values[2].toInt() else null } } -} - -//class TitleRating(id: EntityID) : IntEntity(id) { -// companion object : IntEntityClass(TitleRatings) -// -// var tconst by TitleRatings.tconst -// var averageRating by TitleRatings.averageRating -// var numVotes by TitleRatings.numVotes -//} \ No newline at end of file +} \ No newline at end of file diff --git a/src/main/kotlin/loader/LoadImdb.kt b/src/main/kotlin/loader/LoadImdb.kt index a8d1c95..eb7f089 100644 --- a/src/main/kotlin/loader/LoadImdb.kt +++ b/src/main/kotlin/loader/LoadImdb.kt @@ -4,8 +4,8 @@ import org.jetbrains.exposed.sql.Database fun main(){ - var db = Database.connect("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1", driver = "org.h2.Driver", user = "root", password = "") -// var db = Database.connect("jdbc:mysql://localhost:3308/imdb", driver = "com.mysql.jdbc.Driver", user = "root", password = "aRootPassword") +// var db = Database.connect("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1", driver = "org.h2.Driver", user = "root", password = "") + var db = Database.connect("jdbc:mysql://localhost:3308/imdb?useSSL=false&allowPublicKeyRetrieval=true", driver = "com.mysql.jdbc.Driver", user = "root", password = "aRootPassword") println("Running loader") val titleRatings = TitleRatingsLoader(db) diff --git a/src/main/kotlin/loader/TitleRatingsLoader.kt b/src/main/kotlin/loader/TitleRatingsLoader.kt index f25e9e4..d4f7550 100644 --- a/src/main/kotlin/loader/TitleRatingsLoader.kt +++ b/src/main/kotlin/loader/TitleRatingsLoader.kt @@ -3,8 +3,10 @@ package loader import dsl.TitleRatings import org.jetbrains.exposed.sql.Database import org.jetbrains.exposed.sql.SchemaUtils +import org.jetbrains.exposed.sql.batchInsert import org.jetbrains.exposed.sql.select import org.jetbrains.exposed.sql.transactions.transaction +import tsv.Reader import java.io.File import java.util.concurrent.atomic.AtomicInteger @@ -15,7 +17,7 @@ class TitleRatingsLoader(private val db: Database) { transaction(db) { SchemaUtils.create (TitleRatings) } } - fun loadData(){ + fun loadDataMultiInsert(){ val nameBasicsReader = File("./datasets/title.ratings.tsv").bufferedReader() nameBasicsReader.readLine() @@ -33,6 +35,28 @@ class TitleRatingsLoader(private val db: Database) { println("Done loading title ratings!") } + fun loadData(){ + val nameBasicsReader = File("./datasets/title.ratings.tsv").bufferedReader() + nameBasicsReader.readLine() + + println("Title.Ratings loaded") + + val loader = AtomicInteger() + transaction { + val lines = nameBasicsReader.readLines() + TitleRatings.batchInsert(lines){ + val items = it.split("\t") + this[TitleRatings.tconst] = items[0] + this[TitleRatings.averageRating] = if (items[1] != Reader.NO_DATA) items[1].toFloat() else null + this[TitleRatings.numVotes] = if (items[2] != Reader.NO_DATA) items[2].toInt() else null + if (loader.incrementAndGet() % 10000 == 0) println(it) + } + + } + + println("Done loading title ratings!") + } + fun showSome(){ transaction(db) {