-
Notifications
You must be signed in to change notification settings - Fork 0
/
madridxml2osm.sh
133 lines (104 loc) · 6.12 KB
/
madridxml2osm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/bin/bash
# Convierte un xml de datos.madrid.es a formato osm
# El xml debe ser del tipo que tiene tags <atributo nombre="ID-ENTIDAD">
# Dependencias:
# apt-get install xsltproc
if [ "$#" -ne 2 ]; then
echo 'Uso: '$0' archivo.xml [LLAVE]=[VALOR]'
echo 'Ejemplo: '$0' 201132-0-turismo.xml tourism=museum'
exit 1
fi
xsltproc -V >/dev/null 2>&1 || { echo >&2 "Error: Hace necesitas instalar xsltproc."; echo ""; echo "sudo apt-get install xsltproc -y"; exit 1; }
LLAVE=`echo $2 | awk -F '=' '{print $1}'`
VALOR=`echo $2 | awk -F '=' '{print $2}'`
XML=$1
OUT_FILE=`grep "<infoDataset" -A1 $XML | tail -n1 | awk -F '>' '{print $2}' | awk -F '<' '{print $1}' | perl -pe 's/ /_/g'`
echo "Procesando conjunto de datos: $OUT_FILE" | perl -pe 's/_/ /g'
# preprocesamiento xml
perl -pe 's/<!\[CDATA\[//g' $XML | perl -pe 's/]]>//g' | perl -pe 's/atributo nombre=//g' | tr -d '"' | sed '/^\s*$/d' | sed '1,9d' | perl -pe 's/>\n/XXXXX/g' | perl -pe 's/\n//g' | perl -pe 's/XXXXX/>\n/g' | grep -vi '<tipo>' | grep -v atributos | grep -v DATOSCONTACTOS | grep -v LOCALIZACION | grep -v "atributos idioma" > /tmp/$OUT_FILE.xml
# ampersands
perl -pe 's/&/--FIXME--/g' /tmp/$OUT_FILE.xml > /tmp/foo ; mv /tmp/foo /tmp/$OUT_FILE.xml
echo "<catalogo>" > $OUT_FILE-clean.xml
while IFS='' read -r line || [[ -n "$line" ]]; do
if echo $line | egrep --quiet "^<[A-Z]" ; then
if echo $line | grep --quiet '<' ; then
CLAVE=`echo $line | awk -F '<' '{print $2}' | awk -F '>' '{print $1}'`
echo $line | perl -pe "s/atributo/"$CLAVE"/g" >> $OUT_FILE-clean.xml
else
echo $line >> $OUT_FILE-clean.xml
fi
fi
done < /tmp/$OUT_FILE.xml
perl -pe 's/<ID-ENTIDAD>/<\/contenido><contenido><ID-ENTIDAD>-/g' $OUT_FILE-clean.xml | sed '0,/<\/contenido>/s///' > /tmp/foo ; mv /tmp/foo $OUT_FILE-clean.xml
echo "</contenido> </catalogo>" >> $OUT_FILE-clean.xml
echo '<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<?xml version="1.0" encoding="UTF-8"?>
<osm version="0.6" generator="madridxml2osm.sh">
<xsl:for-each select="catalogo/contenido">
<node id="<xsl:value-of select="ID-ENTIDAD"/>" lat="<xsl:value-of select="LATITUD"/>" lon="<xsl:value-of select="LONGITUD"/>" >
<tag k="'$LLAVE'" v="'$VALOR'"/>
<tag k="name" v="<xsl:value-of select="NOMBRE"/>"/>
<tag k="description" v="<xsl:value-of select="DESCRIPCION"/> <xsl:value-of select="DESCRIPCION-ENTIDAD"/> <xsl:value-of select="EQUIPAMIENTO"/>"/>
<tag k="opening_hours" v="<xsl:value-of select="HORARIO"/>"/>
<tag k="phone" v="+34 <xsl:value-of select="TELEFONO"/>"/>
<tag k="fax" v="+34 <xsl:value-of select="FAX"/>"/>
<tag k="addr:street" v="<xsl:value-of select="CLASE-VIAL"/><xsl:text> FIXME </xsl:text><xsl:value-of select="NOMBRE-VIA"/>"/>
<tag k="addr:housenumber" v="<xsl:value-of select="NUM"/>"/>
<tag k="addr:postcode" v="<xsl:value-of select="CODIGO-POSTAL"/>"/>
<tag k="wheelchair" v="<xsl:value-of select="ACCESIBILIDAD"/>"/>
<tag k="url" v="<xsl:value-of select="CONTENT-URL"/>"/>
<tag k="email" v="<xsl:value-of select="EMAIL"/>"/>
<tag k="source" v="Ayuntamiento de Madrid"/>
</node>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
' > $OUT_FILE.xsl
# Procesado del xsl y xml limpio
rm -f $OUT_FILE.osm
xsltproc $OUT_FILE.xsl $OUT_FILE-clean.xml > $OUT_FILE.osm
echo "</osm>" >> $OUT_FILE.osm
grep -v '" v="&' $OUT_FILE.osm | grep -v '" v="+34 &' | perl -pe 's/</</g' | perl -pe 's/>/>/g' | grep -v '""' | sed '0,/<?xml version="1.0"?>/s///' | perl -pe 's/ <\?/<\?/g' | sed '/^\s*$/d' > /tmp/foo ; mv /tmp/foo $OUT_FILE.osm
# no fax or phone fix
grep -v 'tag k="fax" v="+34 "/>' $OUT_FILE.osm | grep -v 'tag k="phone" v="+34 "/>' > /tmp/foo ; mv /tmp/foo $OUT_FILE.osm
# wheelchair fix
perl -pe 's/tag k="wheelchair" v="1"\/>/tag k="wheelchair" v="yes"\/>/g' $OUT_FILE.osm | perl -pe 's/tag k="wheelchair" v="0"\/>/tag k="wheelchair" v="no"\/>/g' > /tmp/foo ; mv /tmp/foo $OUT_FILE.osm
# space and nodes without coordinates fix
perl -pe 's/v=" /v="/g' $OUT_FILE.osm | perl -pe 's/ //g' | perl -pe 's/ //g' | perl -pe 's/ //g' | perl -pe 's/ //g' | perl -pe 's/ //g' | perl -pe 's/ //g' | perl -pe 's/ / /g' | perl -pe 's/\n//g' | perl -pe 's/\/node>/\/node>\n/g' | grep 'lon=' | perl -pe 's/>/>\n/g' | sed '/^\s*$/d' | perl -pe 's/\n /\n/g' | perl -pe 's/. "/."/g' | perl -pe 's/ ."/."/g' > /tmp/foo ; mv /tmp/foo $OUT_FILE.osm
echo "</osm>" >> $OUT_FILE.osm
while IFS='' read -r line || [[ -n "$line" ]]; do
# corregir capitalización de lugares
if echo $line | grep --quiet "addr:" ; then
LUGAR=`echo $line | awk -F '"' '{print $4}'`
LUGAR_CAPITALIZADO=`echo "${LUGAR,,}" | sed -e "s/\b\(.\)/\u\1/g" | perl -pe 's/Fixme/FIXME/g'`
echo $line | perl -pe "s/$LUGAR/$LUGAR_CAPITALIZADO/g" >> $OUT_FILE-clean.osm
else
# Formato de números de teléfono y fax. Nos quedamos con el primer número.
if echo $line | grep --quiet 'v="+34' ; then
echo $line | tr -d ' ' | sed -n 's/\+349[0-9]\{8\}/&XX/p' | perl -pe 's/XX/"\/>\n/g' | grep 'k=' | perl -pe 's/<tagk/<tag k/g' | perl -pe 's/\+34/\+34 /g' | perl -pe 's/v=/ v=/g'>> $OUT_FILE-clean.osm
else
# fix ampersand en las URLs
if echo $line | grep --quiet 'k="url"' ; then
echo $line | perl -pe 's/--FIXME--/&/g' >> $OUT_FILE-clean.osm
else
echo $line >> $OUT_FILE-clean.osm
fi
fi
fi
done < $OUT_FILE.osm
# tabulation
perl -pe 's/<tag/ <tag/g' $OUT_FILE-clean.osm | perl -pe 's/<node/ <node/g' | perl -pe 's/<\/node>/ <\/node>/g' > $OUT_FILE.osm
# fix headers if absent
if head -n1 $OUT_FILE.osm | grep --quiet "node" ; then
cp $OUT_FILE.osm /tmp/foo
echo '<?xml version="1.0" encoding="UTF-8"?>
<osm version="0.6" generator="madridxml2osm.sh">' > $OUT_FILE.osm
cat /tmp/foo >> $OUT_FILE.osm
fi
# listado y stats
grep 'k="name"' $OUT_FILE.osm | awk -F '"' '{print $4}' > puntos ; cat puntos ; wc -l puntos ; rm puntos
rm $OUT_FILE-clean.osm $OUT_FILE.xsl $OUT_FILE-clean.xml
exit 0