Skip to content

Commit

Permalink
WIP: Gazetteer support for GeoNames and WOF
Browse files Browse the repository at this point in the history
  • Loading branch information
ewlarson committed Nov 25, 2024
1 parent f2c10b3 commit b7a8e58
Show file tree
Hide file tree
Showing 12 changed files with 615 additions and 22 deletions.
3 changes: 3 additions & 0 deletions app/models/gazetteer/geonames/name.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class Gazetteer::Geonames::Name < ApplicationRecord
self.table_name = 'gazetteer_geonames_names'
end
3 changes: 3 additions & 0 deletions app/models/gazetteer/wof/ancestor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class Gazetteer::Wof::Ancestor < ApplicationRecord
self.table_name = 'gazetteer_wof_ancestors'
end
3 changes: 3 additions & 0 deletions app/models/gazetteer/wof/concordance.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class Gazetteer::Wof::Concordance < ApplicationRecord
self.table_name = 'gazetteer_wof_concordances'
end
3 changes: 3 additions & 0 deletions app/models/gazetteer/wof/geojson.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class Gazetteer::Wof::Geojson < ApplicationRecord
self.table_name = 'gazetteer_wof_geojson'
end
3 changes: 3 additions & 0 deletions app/models/gazetteer/wof/name.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class Gazetteer::Wof::Name < ApplicationRecord
self.table_name = 'gazetteer_wof_names'
end
3 changes: 3 additions & 0 deletions app/models/gazetteer/wof/spr.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class Gazetteer::Wof::Spr < ApplicationRecord
self.table_name = 'gazetteer_wof_spr'
end
13 changes: 0 additions & 13 deletions app/models/geoname.rb

This file was deleted.

1 change: 1 addition & 0 deletions db/migrate/20241110202927_create_geonames.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def change
t.timestamps
end

# @TODO: Add indexes after importing the data.
# Indexes
# add_index :geonames, :geonameid, unique: true
# add_index :geonames, :name
Expand Down
72 changes: 72 additions & 0 deletions db/migrate/20241124223351_create_gazetteer_wok_tables.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
class CreateGazetteerWokTables < ActiveRecord::Migration[7.0]
def change
rename_table :geonames, :gazetteer_geonames_names
rename_column :gazetteer_geonames_names, :geonameid, :geoname_id

create_table :gazetteer_wof_ancestors do |t|
t.bigint :wok_id
t.integer :ancestor_id
t.string :ancestor_placetype
t.integer :lastmodified
t.timestamps
end

create_table :gazetteer_wof_concordances do |t|
t.bigint :wok_id
t.string :other_id
t.string :other_source
t.integer :lastmodified
t.timestamps
end

create_table :gazetteer_wof_geojson do |t|
t.bigint :wok_id
t.text :body
t.string :source
t.string :alt_label
t.boolean :is_alt
t.integer :lastmodified
t.timestamps
end

create_table :gazetteer_wof_names do |t|
t.bigint :wok_id
t.string :placetype
t.string :country
t.string :language
t.string :extlang
t.string :script
t.string :region
t.string :variant
t.string :extension
t.string :privateuse
t.string :name
t.integer :lastmodified
t.timestamps
end

create_table :gazetteer_wof_spr do |t|
t.bigint :wok_id
t.integer :parent_id
t.string :name
t.string :placetype
t.string :country
t.string :repo
t.decimal :latitude
t.decimal :longitude
t.decimal :min_latitude
t.decimal :min_longitude
t.decimal :max_latitude
t.decimal :max_longitude
t.integer :is_current
t.integer :is_deprecated
t.integer :is_ceased
t.integer :is_superseded
t.integer :is_superseding
t.integer :superseded_by
t.integer :supersedes
t.integer :lastmodified
t.timestamps
end
end
end
145 changes: 142 additions & 3 deletions db/schema.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.

ActiveRecord::Schema[7.0].define(version: 2024_11_10_202927) do
ActiveRecord::Schema[7.0].define(version: 2024_11_24_223351) do
# These are extensions that must be enabled in order to support this database
enable_extension "pgcrypto"
enable_extension "plpgsql"
Expand Down Expand Up @@ -84,6 +84,16 @@
t.index ["blob_id", "variation_digest"], name: "index_active_storage_variant_records_uniqueness", unique: true
end

create_table "ancestors", id: false, force: :cascade do |t|
t.bigint "id"
t.bigint "ancestor_id"
t.text "ancestor_placetype"
t.bigint "lastmodified"
t.index ["ancestor_id", "ancestor_placetype", "lastmodified"], name: "idx_17618719_ancestors_by_ancestor"
t.index ["id", "ancestor_placetype", "lastmodified"], name: "idx_17618719_ancestors_by_id"
t.index ["lastmodified"], name: "idx_17618719_ancestors_by_lastmod"
end

create_table "blacklight_allmaps_sidecars", force: :cascade do |t|
t.string "solr_document_id"
t.string "document_type", default: "SolrDocument"
Expand Down Expand Up @@ -221,6 +231,17 @@
t.datetime "updated_at", null: false
end

create_table "concordances", id: false, force: :cascade do |t|
t.bigint "id"
t.text "other_id"
t.text "other_source"
t.bigint "lastmodified"
t.index ["id", "lastmodified"], name: "idx_17618724_concordances_by_id"
t.index ["lastmodified"], name: "idx_17618724_concordances_by_lastmod"
t.index ["other_source", "other_id", "lastmodified"], name: "idx_17618724_concordances_by_other_lastmod"
t.index ["other_source", "other_id"], name: "idx_17618724_concordances_by_other_id"
end

create_table "document_accesses", force: :cascade do |t|
t.string "friendlier_id", null: false
t.string "institution_code", null: false
Expand Down Expand Up @@ -311,8 +332,8 @@
t.datetime "updated_at", null: false
end

create_table "geonames", force: :cascade do |t|
t.bigint "geonameid"
create_table "gazetteer_geonames_names", force: :cascade do |t|
t.bigint "geoname_id"
t.string "name"
t.string "asciiname"
t.text "alternatenames"
Expand All @@ -335,6 +356,86 @@
t.datetime "updated_at", null: false
end

create_table "gazetteer_wof_ancestors", force: :cascade do |t|
t.bigint "wok_id"
t.integer "ancestor_id"
t.string "ancestor_placetype"
t.integer "lastmodified"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end

create_table "gazetteer_wof_concordances", force: :cascade do |t|
t.bigint "wok_id"
t.string "other_id"
t.string "other_source"
t.integer "lastmodified"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end

create_table "gazetteer_wof_geojson", force: :cascade do |t|
t.bigint "wok_id"
t.text "body"
t.string "source"
t.string "alt_label"
t.boolean "is_alt"
t.integer "lastmodified"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end

create_table "gazetteer_wof_names", force: :cascade do |t|
t.bigint "wok_id"
t.string "placetype"
t.string "country"
t.string "language"
t.string "extlang"
t.string "script"
t.string "region"
t.string "variant"
t.string "extension"
t.string "privateuse"
t.string "name"
t.integer "lastmodified"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end

create_table "gazetteer_wof_spr", force: :cascade do |t|
t.bigint "wok_id"
t.integer "parent_id"
t.string "name"
t.string "placetype"
t.string "country"
t.string "repo"
t.decimal "latitude"
t.decimal "longitude"
t.decimal "min_latitude"
t.decimal "min_longitude"
t.decimal "max_latitude"
t.decimal "max_longitude"
t.integer "is_current"
t.integer "is_deprecated"
t.integer "is_ceased"
t.integer "is_superseded"
t.integer "is_superseding"
t.integer "superseded_by"
t.integer "supersedes"
t.integer "lastmodified"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end

create_table "geojson", id: false, force: :cascade do |t|
t.bigint "id"
t.text "body"
t.text "source"
t.text "alt_label"
t.boolean "is_alt"
t.bigint "lastmodified"
end

create_table "image_upload_transitions", force: :cascade do |t|
t.string "to_state", null: false
t.text "metadata"
Expand Down Expand Up @@ -448,6 +549,21 @@
t.index ["import_id"], name: "index_mappings_on_import_id"
end

create_table "names", id: false, force: :cascade do |t|
t.bigint "id"
t.text "placetype"
t.text "country"
t.text "language"
t.text "extlang"
t.text "script"
t.text "region"
t.text "variant"
t.text "extension"
t.text "privateuse"
t.text "name"
t.bigint "lastmodified"
end

create_table "notifications", force: :cascade do |t|
t.string "recipient_type", null: false
t.bigint "recipient_id", null: false
Expand Down Expand Up @@ -522,6 +638,29 @@
t.index ["document_type", "document_id"], name: "solr_document_uris_solr_document"
end

create_table "spr", id: false, force: :cascade do |t|
t.bigint "id"
t.bigint "parent_id"
t.text "name"
t.text "placetype"
t.text "country"
t.text "repo"
t.float "latitude"
t.float "longitude"
t.float "min_latitude"
t.float "min_longitude"
t.float "max_latitude"
t.float "max_longitude"
t.bigint "is_current"
t.bigint "is_deprecated"
t.bigint "is_ceased"
t.bigint "is_superseded"
t.bigint "is_superseding"
t.text "superseded_by"
t.text "supersedes"
t.bigint "lastmodified"
end

create_table "uri_transitions", force: :cascade do |t|
t.string "to_state", null: false
t.text "metadata"
Expand Down
8 changes: 4 additions & 4 deletions lib/tasks/geoportal/gazetteer/geonames.rake
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ namespace :geoportal do
text = f.readline
row = CSV.parse_line(text, col_sep: "\t", headers: false)
geonames << {
geonameid: row[0],
geoname_id: row[0],
name: row[1],
asciiname: row[2],
alternatenames: row[3],
Expand All @@ -97,7 +97,7 @@ namespace :geoportal do

# Import every 100000 records
if geonames.size >= 100000
Geoname.import(geonames, validate: false)
Gazetteer::Geonames::Name.import(geonames, validate: false)
geonames.clear
end

Expand All @@ -110,7 +110,7 @@ namespace :geoportal do
end

# Import any remaining records
Geoname.import(geonames, validate: false) unless geonames.empty?
Gazetteer::Geonames::Name.import(geonames, validate: false) unless geonames.empty?

puts "Geonames import completed successfully."
end
Expand All @@ -125,7 +125,7 @@ namespace :geoportal do
connection.execute <<-SQL
COPY (
SELECT
geonameid AS geonameid_i,
geoname_id AS geonameid_i,
name,
asciiname AS asciiname_s,
alternatenames AS alternatenames_s,
Expand Down
Loading

0 comments on commit b7a8e58

Please sign in to comment.