| Author |
Message |
|
|
What would be the best way?
Add an extra RFC 3066 column in the languages table?
Or use the iso_639_3 code in countryinfo instead?
how about adding the native namings per language like in the wikipedia?
(http://en.wikipedia.org/wiki/List_of_ISO_639-2_codes)
thx,
Bastiaan
|
 |
|
|
nice, fixed it in my script too:
http://forum.geonames.org/gforum/posts/list/926.page#5449
cheers,
Bastiaan
|
 |
|
|
bwakkie wrote:
Hi,
I loved the script so I started working on it. This is the working end result.
done:
It will create a working path if it is not there
It will create the db
It wil check if you are already using the latest files from the net and therefore not downloading it again
Added the command CASCADE to DROP TABLE geoname due to the constrains
I do not thing you need to change DROP TABLE to DROP TABLE IF EXIST, it is sort of pointless
todo:
perhaps only update/insert/delete changes in the future by removing duplicate lines from the txt files?
Code:
#!/bin/bash
#===============================================================================
#
# FILE: getgeo.sh
#
# USAGE: ./getgeo.sh
#
# DESCRIPTION: run the script so that the geodata will be downloaded and inserted into your
# database
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Andreas (aka Harpagophyt )
# COMPANY: <a href="http://forum.geonames.org/gforum/posts/list/926.page" target="_blank" rel="nofollow">http://forum.geonames.org/gforum/posts/list/926.page</a>
# VERSION: 1.3
# CREATED: 07/06/2008
# REVISION: 1.1 2008-06-07 replace COPY continentCodes through INSERT statements.
# 1.2 2008-11-25 Adjusted by Bastiaan Wakkie in order to not unnessisarily
# download.
#===============================================================================
#!/bin/bash
WORKPATH="~/geodata"
TMPPATH="tmp"
PCPATH="pc"
PREFIX="_"
DBUSER="$USER"
FILES="allCountries.zip alternateNames.zip userTags.zip admin1Codes.txt admin1CodesASCII.txt admin2Codes.txt countryInfo.txt featureCodes.txt iso-languagecodes.txt timeZones.txt"
createdb -O $DBUSER geonames
psql -U $DBUSER geonames <<EOT
DROP TABLE geoname CASCADE;
CREATE TABLE geoname (
geonameid int,
name varchar(200),
asciiname varchar(200),
alternatenames varchar(4000),
latitude float,
longitude float,
fclass char(1),
fcode varchar(10),
country varchar(2),
cc2 varchar(60),
admin1 varchar(20),
admin2 varchar(80),
admin3 varchar(20),
admin4 varchar(20),
population bigint,
elevation int,
gtopo30 int,
timezone varchar(40),
moddate date
);
DROP TABLE alternatename;
CREATE TABLE alternatename (
alternatenameId int,
geonameid int,
isoLanguage varchar(7),
alternateName varchar(200),
isPreferredName boolean,
isShortName boolean,
isColloquial boolean,
isHistoric boolean
);
DROP TABLE countryinfo;
CREATE TABLE "countryinfo" (
iso_alpha2 char(2),
iso_alpha3 char(3),
iso_numeric integer,
fips_code character varying(3),
country character varying(200),
capital character varying(200),
areainsqkm double precision,
population integer,
continent char(2),
tld CHAR(10),
currency_code char(3),
currency_name CHAR(15),
phone character varying(20),
languages character varying(200),
geonameId int,
neighbours character varying(50),
equivalent_fips_code character varying(3)
);
DROP TABLE iso_languagecodes;
CREATE TABLE iso_languagecodes(
iso_639_3 CHAR(4),
iso_639_2 VARCHAR(50),
iso_639_1 VARCHAR(50),
language_name VARCHAR(200)
);
DROP TABLE admin1Codes;
CREATE TABLE admin1Codes (
code CHAR(6),
name TEXT
);
DROP TABLE admin1CodesAscii;
CREATE TABLE admin1CodesAscii (
code CHAR(6),
name TEXT,
nameAscii TEXT,
geonameid int
);
DROP TABLE featureCodes;
CREATE TABLE featureCodes (
code CHAR(7),
name VARCHAR(200),
description TEXT
);
DROP TABLE timeZones;
CREATE TABLE timeZones (
timeZoneId VARCHAR(200),
GMT_offset numeric(3,1),
DST_offset numeric(3,1)
);
DROP TABLE continentCodes;
CREATE TABLE continentCodes (
code CHAR(2),
name VARCHAR(20),
geonameid INT
);
DROP TABLE postalcodes;
CREATE TABLE postalcodes (
countrycode char(2),
postalcode varchar(10),
placename varchar(180),
admin1name varchar(100),
admin1code varchar(20),
admin2name varchar(100),
admin2code varchar(20),
admin3name varchar(100),
latitude float,
longitude float,
accuracy smallint
);
ALTER TABLE ONLY alternatename
ADD CONSTRAINT pk_alternatenameid PRIMARY KEY (alternatenameid);
ALTER TABLE ONLY geoname
ADD CONSTRAINT pk_geonameid PRIMARY KEY (geonameid);
ALTER TABLE ONLY countryinfo
ADD CONSTRAINT pk_iso_alpha2 PRIMARY KEY (iso_alpha2);
ALTER TABLE ONLY countryinfo
ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid);
ALTER TABLE ONLY alternatename
ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid);
EOT
# check if needed directories do already exsist
if [ -d "$WORKPATH" ]; then
sleep 0
else
mkdir $WORKPATH
mkdir $WORKPATH/$TMPPATH
mkdir $WORKPATH/$PCPATH
echo "created $WORKPATH"
fi
echo
echo ",---- STARTING (downloading, unpacking and preparing)"
cd $WORKPATH/$TMPPATH
for i in $FILES
do
wget -N -q "http://download.geonames.org/export/dump/$i" # get newer files
if [ $i -nt $PREFIX$i ] || [ ! -e $PREFIX$i ] ; then
cp -p $i $PREFIX$i
unzip -u -q $i
case "$i" in
iso-languagecodes.txt)
tail -n +2 iso-languagecodes.txt > iso-languagecodes.txt.tmp;
;;
countryInfo.txt)
grep -v '^#' countryInfo.txt | tail -n +2 > countryInfo.txt.tmp;
;;
timeZones.txt)
tail -n +2 timeZones.txt > timeZones.txt.tmp;
;;
esac
echo "| $1 has been downloaded";
else
echo "| $i is already the latest version"
fi
done
# download the postalcodes. You must know yourself the url
cd $WORKPATH/$PCPATH
wget -q -N "http://.../....zip"
if [ $WORKPATH/$PCPATH/....zip -nt $WORKPATH/$PCPATH/allCountries$PREFIX.zip ] || [ ! -e $WORKPATH/$PCPATH/....zip ]; then
unzip -u -q $WORKPATH/$PCPATH/....zip
cp -p $WORKPATH/$PCPATH/....zip $WORKPATH/$PCPATH/allCountries$PREFIX.zip
echo "| ....zip has been downloaded"
else
echo "| ....zip is already the latest version"
fi
echo "+---- FILL DATABASE ( this takes 2 days on my machine :)"
psql -e -U $DBUSER geonames <<EOT
copy geoname (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '${WORKPATH}/${TMPPATH}/allCountries.txt' null as '';
copy postalcodes (countrycode,postalcode,placename,admin1name,admin1code,admin2name,admin2code,admin3name,latitude,longitude,accuracy) from '${WORKPATH}/${PCPATH}/allCountries.txt' null as '';
copy timeZones (timeZoneId,GMT_offset,DST_offset) from '${WORKPATH}/${TMPPATH}/timeZones.txt.tmp' null as '';
copy featureCodes (code,name,description) from '${WORKPATH}/${TMPPATH}/featureCodes.txt' null as '';
copy admin1CodesAscii (code,name,nameAscii,geonameid) from '${WORKPATH}/${TMPPATH}/admin1CodesASCII.txt' null as '';
copy admin1Codes (code,name) from '${WORKPATH}/${TMPPATH}/admin1Codes.txt' null as '';
copy iso_languagecodes (iso_639_3,iso_639_2,iso_639_1,language_name) from '${WORKPATH}/${TMPPATH}/iso-languagecodes.txt.tmp' null as '';
copy countryInfo (iso_alpha2,iso_alpha3,iso_numeric,fips_code,country,capital,areainsqkm,population,continent,tld,currency_code,currency_name,phone,postal,postalRegex,languages,geonameid,neighbours,equivalent_fips_code) from '${WORKPATH}/${TMPPATH}/countryInfo.txt.tmp' null as '';
copy alternatename (alternatenameid,geonameid,isoLanguage,alternateName,isPreferredName,isShortName,isColloquial,isHistoric) from '${WORKPATH}/${TMPPATH}/alternateNames.txt' null as '';
INSERT INTO continentCodes VALUES ('AF', 'Africa', 6255146);
INSERT INTO continentCodes VALUES ('AS', 'Asia', 6255147);
INSERT INTO continentCodes VALUES ('EU', 'Europe', 6255148);
INSERT INTO continentCodes VALUES ('NA', 'North America', 6255149);
INSERT INTO continentCodes VALUES ('OC', 'Oceania', 6255150);
INSERT INTO continentCodes VALUES ('SA', 'South America', 6255151);
INSERT INTO continentCodes VALUES ('AN', 'Antarctica', 6255152);
CREATE INDEX index_countryinfo_geonameid ON countryinfo USING hash (geonameid);
CREATE INDEX index_alternatename_geonameid ON alternatename USING hash (geonameid);
EOT
echo "'----- DONE ( have fun... )"
|
 |
|
|
First the countryInfo table has changed.
in PostgreSQL I do the following:
Code:
CREATE TABLE countryinfo (
iso_alpha2 char(2),
iso_alpha3 char(3),
iso_numeric integer,
fips_code character varying(3),
name character varying(200),
capital character varying(200),
areainsqkm double precision,
population integer,
continent char(2),
tld character varying(200),
currency char(3),
curranceName char(10),
phone char(10),
postal char(60),
postalRegex char(250),
languages char(130),
equivalentFipsCode integer,
neighbours char(50)
);
Code:
grep -v '^#' countryInfo.txt |sed 's/\t$//g'| tail -n +2 > countryInfo.txt.tmp;
...before populating the countryinfo table in psql ...
Code:
copy countryinfo (iso_alpha2,iso_alpha3,iso_numeric,fips_code,name,capital,areaInSqKm,population,continent,tld,currency,curranceName,phone,postal,postalRegex,languages,equivalentFipsCode,neighbours) from '${WORKPATH}/${TMPPATH}/countryInfo.txt.tmp' null as '';
grep removes all comments
sed removes the tab at the end of each line
and tail removes the column names
see also my importing script here (that will be updates shortly ):
http://forum.geonames.org/gforum/posts/list/926.page
Cheers,
Bastiaan
|
 |
|
|
The countryInfo.txt has wrong table field names:
example:
Code:
#ISO ISO3 ISO-Numeric fips Country Capital Area(in sq km) Population Continent tld CurrencyCode CurrencyName Phone Postal Code Format Postal Code Regex Languages geonameid neighbours EquivalentFipsCode
AD AND 20 AN Andorra Andorra la Vella 468 72000 EU .ad EUR Euro 376 AD### ^(?:AD)*(\d{3})$ ca,fr-AD,pt 3041565 ES,FR
neighbours EquivalentFipsCode are switched!
also the geonameid does not exist anymore
so new postgresql sql will be something like this:
Code:
I adjusted the code above as the length values of phone and currance name where changed too. There was a weird typo with one tab too much .. this is handled now too.
|
 |
|
|
Hi,
I loved the script so I started working on it. This is the working end result.
done:
It will create a working path if it is not there
It will create the db
It wil check if you are already using the latest files from the net and therefore not downloading it again
Added the command CASCADE to DROP TABLE geoname due to the constrains
I do not thing you need to change DROP TABLE to DROP TABLE IF EXIST, it is sort of pointless
todo:
perhaps only update/insert/delete changes in the future by removing duplicate lines from the txt files?
Code:
#!/bin/bash
#===============================================================================
#
# FILE: getgeo.sh
#
# USAGE: ./getgeo.sh
#
# DESCRIPTION: run the script so that the geodata will be downloaded and inserted into your
# database
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Andreas (aka Harpagophyt )
# COMPANY: <a href="http://forum.geonames.org/gforum/posts/list/926.page" target="_blank" rel="nofollow">http://forum.geonames.org/gforum/posts/list/926.page</a>
# VERSION: 1.3
# CREATED: 07/06/2008
# REVISION: 1.1 2008-06-07 replace COPY continentCodes through INSERT statements.
# 1.2 2008-11-25 Adjusted by Bastiaan Wakkie in order to not unnessisarily
# 1.3 2009-02-18 Fixed countryInfo bug in Aland Islands as there was one tab too many at te end and sone field lengths needed to be larger
# download.
#===============================================================================
#!/bin/bash
WORKPATH="~/geodata"
TMPPATH="tmp"
PCPATH="pc"
PREFIX="_"
DBUSER="$USER"
FILES="allCountries.zip alternateNames.zip userTags.zip admin1Codes.txt admin1CodesASCII.txt admin2Codes.txt countryInfo.txt featureCodes.txt iso-languagecodes.txt timeZones.txt"
createdb -O $DBUSER geonames
psql -U $DBUSER geonames <<EOT
DROP TABLE geoname CASCADE;
CREATE TABLE geoname (
geonameid int,
name varchar(200),
asciiname varchar(200),
alternatenames varchar(4000),
latitude float,
longitude float,
fclass char(1),
fcode varchar(10),
country varchar(2),
cc2 varchar(60),
admin1 varchar(20),
admin2 varchar(80),
admin3 varchar(20),
admin4 varchar(20),
population bigint,
elevation int,
gtopo30 int,
timezone varchar(40),
moddate date
);
DROP TABLE alternatename;
CREATE TABLE alternatename (
alternatenameId int,
geonameid int,
isoLanguage varchar(7),
alternateName varchar(200),
isPreferredName boolean,
isShortName boolean
isColloquial boolean,
isHistoric boolean
);
DROP TABLE countryinfo;
CREATE TABLE countryinfo (
iso_alpha2 char(2),
iso_alpha3 char(3),
iso_numeric integer,
fips_code character varying(3),
name character varying(200),
capital character varying(200),
areainsqkm double precision,
population integer,
continent char(2),
tld character varying(200),
currency char(3),
curranceName char(15),
phone char(20),
postal char(60),
postalRegex char(250),
languages char(130),
geonameid integer,
neighbours char(50)
);
DROP TABLE iso_languagecodes;
CREATE TABLE iso_languagecodes(
iso_639_3 CHAR(4),
iso_639_2 VARCHAR(50),
iso_639_1 VARCHAR(50),
language_name VARCHAR(200)
);
DROP TABLE admin1Codes;
CREATE TABLE admin1Codes (
code CHAR(6),
name TEXT
);
DROP TABLE admin1CodesAscii;
CREATE TABLE admin1CodesAscii (
code CHAR(6),
name TEXT,
nameAscii TEXT,
geonameid int
);
DROP TABLE featureCodes;
CREATE TABLE featureCodes (
code CHAR(7),
name VARCHAR(200),
description TEXT
);
DROP TABLE timeZones;
CREATE TABLE timeZones (
timeZoneId VARCHAR(200),
GMT_offset numeric(3,1),
DST_offset numeric(3,1)
);
DROP TABLE continentCodes;
CREATE TABLE continentCodes (
code CHAR(2),
name VARCHAR(20),
geonameid INT
);
DROP TABLE postalcodes;
CREATE TABLE postalcodes (
countrycode char(2),
postalcode varchar(10),
placename varchar(180),
admin1name varchar(100),
admin1code varchar(20),
admin2name varchar(100),
admin2code varchar(20),
admin3name varchar(100),
latitude float,
longitude float,
accuracy smallint
);
ALTER TABLE ONLY alternatename
ADD CONSTRAINT pk_alternatenameid PRIMARY KEY (alternatenameid);
ALTER TABLE ONLY geoname
ADD CONSTRAINT pk_geonameid PRIMARY KEY (geonameid);
ALTER TABLE ONLY countryinfo
ADD CONSTRAINT pk_iso_alpha2 PRIMARY KEY (iso_alpha2);
ALTER TABLE ONLY countryinfo
ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid);
ALTER TABLE ONLY alternatename
ADD CONSTRAINT fk_geonameid FOREIGN KEY (geonameid) REFERENCES geoname(geonameid);
EOT
# check if needed directories do already exsist
if [ -d "$WORKPATH" ]; then
sleep 0
else
mkdir $WORKPATH
mkdir $WORKPATH/$TMPPATH
mkdir $WORKPATH/$PCPATH
echo "created $WORKPATH"
fi
echo
echo ",---- STARTING (downloading, unpacking and preparing)"
cd $WORKPATH/$TMPPATH
for i in $FILES
do
wget -N -q "http://download.geonames.org/export/dump/$i" # get newer files
if [ $i -nt $PREFIX$i ] || [ ! -e $PREFIX$i ] ; then
cp -p $i $PREFIX$i
unzip -u -q $i
case "$i" in
iso-languagecodes.txt)
tail -n +2 iso-languagecodes.txt > iso-languagecodes.txt.tmp;
;;
countryInfo.txt)
grep -v '^#' countryInfo.txt | tail -n +2 | sed 's/ FI/ FI/g' > countryInfo.txt.tmp;
;;
timeZones.txt)
tail -n +2 timeZones.txt > timeZones.txt.tmp;
;;
esac
echo "| $1 has been downloaded";
else
echo "| $i is already the latest version"
fi
done
# download the postalcodes. You must know yourself the url
cd $WORKPATH/$PCPATH
wget -q -N "http://.../....zip"
if [ $WORKPATH/$PCPATH/....zip -nt $WORKPATH/$PCPATH/allCountries$PREFIX.zip ] || [ ! -e $WORKPATH/$PCPATH/....zip ]; then
unzip -u -q $WORKPATH/$PCPATH/....zip
cp -p $WORKPATH/$PCPATH/....zip $WORKPATH/$PCPATH/allCountries$PREFIX.zip
echo "| ....zip has been downloaded"
else
echo "| ....zip is already the latest version"
fi
echo "+---- FILL DATABASE ( this takes 2 days on my machine :)"
psql -e -U $DBUSER geonames <<EOT
copy geoname (geonameid,name,asciiname,alternatenames,latitude,longitude,fclass,fcode,country,cc2,admin1,admin2,admin3,admin4,population,elevation,gtopo30,timezone,moddate) from '${WORKPATH}/${TMPPATH}/allCountries.txt' null as '';
copy postalcodes (countrycode,postalcode,placename,admin1name,admin1code,admin2name,admin2code,admin3name,latitude,longitude,accuracy) from '${WORKPATH}/${PCPATH}/allCountries.txt' null as '';
copy timeZones (timeZoneId,GMT_offset,DST_offset) from '${WORKPATH}/${TMPPATH}/timeZones.txt.tmp' null as '';
copy featureCodes (code,name,description) from '${WORKPATH}/${TMPPATH}/featureCodes.txt' null as '';
copy admin1CodesAscii (code,name,nameAscii,geonameid) from '${WORKPATH}/${TMPPATH}/admin1CodesASCII.txt' null as '';
copy admin1Codes (code,name) from '${WORKPATH}/${TMPPATH}/admin1Codes.txt' null as '';
copy iso_languagecodes (iso_639_3,iso_639_2,iso_639_1,language_name) from '${WORKPATH}/${TMPPATH}/iso-languagecodes.txt.tmp' null as '';
copy countryInfo (iso_alpha2,iso_alpha3,iso_numeric,fips_code,country,capital,areainsqkm,population,continent,tld,currency_code,currency_name,phone,postal,postalRegex,languages,geonameId,neighbours) from '${WORKPATH}/${TMPPATH}/countryInfo.txt.tmp' null as '';
copy alternatename (alternatenameid,geonameid,isoLanguage,alternateName,isPreferredName,isShortName) from '${WORKPATH}/${TMPPATH}/alternateNames.txt' null as '';
INSERT INTO continentCodes VALUES ('AF', 'Africa', 6255146);
INSERT INTO continentCodes VALUES ('AS', 'Asia', 6255147);
INSERT INTO continentCodes VALUES ('EU', 'Europe', 6255148);
INSERT INTO continentCodes VALUES ('NA', 'North America', 6255149);
INSERT INTO continentCodes VALUES ('OC', 'Oceania', 6255150);
INSERT INTO continentCodes VALUES ('SA', 'South America', 6255151);
INSERT INTO continentCodes VALUES ('AN', 'Antarctica', 6255152);
CREATE INDEX index_countryinfo_geonameid ON countryinfo USING hash (geonameid);
CREATE INDEX index_alternatename_geonameid ON alternatename USING hash (geonameid);
EOT
echo "'----- DONE ( have fun... )"
|
 |
|
|
|
http://forum.geonames.org/gforum/posts/list/926.page does more automatically
|
 |
|
|
|
|