Skip to content

Commit

Permalink
Continued to refactor the OpenStreetMap parser - still debugging, do …
Browse files Browse the repository at this point in the history
…not use this version
  • Loading branch information
nigelhorne committed Jan 25, 2024
1 parent ee6f5f5 commit d7e0960
Showing 1 changed file with 40 additions and 6 deletions.
46 changes: 40 additions & 6 deletions createdatabase.PL
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ use constant DEBUG_OSM => 0x1000; # Test - and only import, the OSM_HOME data
use constant DEBUG_ALL => 0x1000;
use constant DEBUG => DEBUG_OSM;

use constant MAX_INSERT_COUNT => 250; # Maximum number of CSV rows to insert in a single statement
# use constant MAX_INSERT_COUNT => 1; # Maximum number of CSV rows to insert in a single statement
# use constant MAX_INSERT_COUNT => 250; # Maximum number of CSV rows to insert in a single statement
use constant MAX_INSERT_COUNT => 1; # Maximum number of CSV rows to insert in a single statement
# use constant SQLITE_CHUNK_SIZE => 1_000; # Number of rows to read at a time

binmode(STDOUT, "encoding(UTF-8)");
Expand Down Expand Up @@ -2074,6 +2074,9 @@ if(my $dr5hn = $ENV{'DR5HN_HOME'}) {

if(my $osm = $ENV{'OSM_HOME'}) {
# Openstreetmap
# There are a range of differing formats -
# the format doesn't seem to be normalized and fields are inconsistent,
# so try hard to extract the data

my @files = ('north-america-latest.osm.bz2', 'europe-latest.osm.bz2', 'australia-oceania-latest.osm.bz2');

Expand Down Expand Up @@ -2257,7 +2260,7 @@ if($place) {
'LAT' => $lat,
'LON' => $lon,
};
} elsif(defined($name) && ($name !~ /,/) && ($is_in =~ /[,;]\sCanada$/) && ($is_in =~ /[;,]/)) {
} elsif(defined($name) && ($name !~ /,/) && ($is_in =~ /[,;]\sCanada/) && ($is_in =~ /[;,]/)) {
if(scalar(@is_in_fields) == 3) {
my $code;
if($is_in_fields[1] =~ /Qu.bec/i) {
Expand Down Expand Up @@ -2349,9 +2352,10 @@ print __LINE__, "\n";
'LON' => $lon,
};
$row->{'NAME'} = $name if(defined($name));
} elsif($ca->{'province2code'}{uc($is_in_fields[0])} eq $is_in_fields[2]) {
} elsif($ca->{'province2code'}{uc($is_in_fields[0])} && ($ca->{'province2code'}{uc($is_in_fields[0])} eq $is_in_fields[2])) {
print __LINE__, "\n";
$state = $is_in_fields[2];
die if(!$ca->{'code2province'}{$state});
# name/place/is_in = Queens/undef/New Brunswick,N.B.,NB,Canada
$row = {
STATE => $state,
Expand All @@ -2376,6 +2380,16 @@ print __LINE__, "\n";
'LON' => $lon,
};
$row->{'NAME'} = $name if(defined($name));
} elsif(($state = $ca->{'province2code'}{uc($is_in_fields[1])}) && ($is_in_fields[2] eq 'Canada')) {
# name/place/is_in = Elmwood/undef/Winnipeg; Manitoba; Canada; CA,
$row = {
'CITY' => $is_in_fields[0],
'STATE' => $state,
'COUNTRY' => 'Canada',
'LAT' => $lat,
'LON' => $lon,
};
$row->{'NAME'} = $name if(defined($name));
}
} elsif(scalar(@is_in_fields) == 2) {
print __LINE__, "\n";
Expand Down Expand Up @@ -2434,6 +2448,16 @@ print __LINE__, "\n";
}
$row->{'NAME'} = $name if(defined($name));
die Data::Dumper->new([$row])->Dump();
} elsif((scalar(@is_in_fields) == 8) && ($is_in_fields[6] eq 'Canada')) {
# name/place/in_in = Pembina Strip/undef/Fort Garry South; Winnipeg; River Heights—Fort Garry; Fort Garry; Winnipeg; Manitoba; Canada; CA
$state = $ca->{'state2code'}{uc($is_in_fields[5])};
$row = {
CITY => $name,
STATE => $state,
COUNTRY => 'Canada',
LAT => $lat,
LON => $lon,
};
} elsif(scalar(@is_in_fields) >= 5) {
if(($is_in_fields[0] eq $is_in_fields[4]) && $ca->{'province2code'}{uc($is_in_fields[4])}) {
# name/place/is_in = Weston/undef/Manitoba, St. James–Brooklands; Assiniboia; Winnipeg; Manitoba; Canada
Expand All @@ -2451,7 +2475,17 @@ print __LINE__, "\n";
COUNTRY => 'Canada',
LAT => $lat,
LON => $lon,
};
}
} elsif(($state = $ca->{'province2code'}{uc($is_in_fields[2])}) && ($is_in_fields[3] eq 'Canada')) {
# name/place/is_in = East Kildonan/undef/East Kildonan–Transcona; Winnipeg; Manitoba; Canada; CA
$row = {
NAME => $name || $is_in_fields[0],
CITY => $is_in_fields[1],
STATE => $state,
COUNTRY => 'Canada',
LAT => $lat,
LON => $lon,
}
}
}
die scalar(@is_in_fields) if(!defined($row));
Expand Down Expand Up @@ -3034,7 +3068,7 @@ sub create_tree_from_git {
# Import a location into the database
# Parses the data and prepares a set of columns to be queued for insertion
# global stores in the global_md5 as well, useful when going through something state by state
# If NUMBER is given, also add without that field
# If NUMBER and/or NAME is given, also add without that field
sub import
{
my %param;
Expand Down

0 comments on commit d7e0960

Please sign in to comment.