Skip to content

Commit

Permalink
Fix 1_downloadOsmDataToFiles task.
Browse files Browse the repository at this point in the history
The download Osm data task worked incorrectly in the step where the geojson is written to a file, resulting in an incorrectly formatted geojson.
This commit changes what is written to the file so that the json is properly formatted and has closing brackets in the right place.
We also put the stream writing in a promise and add a check that pauses the reading when the writing isn't done yet, so the two stream remain in sync when writing a large file.
Finally, we also replace the promisify(pipeline) with just a pipeline that comes from the node:stream/promises module, which does the same thing for less lines of code.
  • Loading branch information
GabrielBruno24 authored and tahini committed Jan 21, 2025
1 parent ccb4d45 commit e37c13c
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 16 deletions.
56 changes: 40 additions & 16 deletions packages/chaire-lib-common/src/utils/osm/OsmOverpassDownloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import fetch from 'node-fetch';
import osmToGeojson from 'osmtogeojson';
import fs from 'fs';
import { pipeline } from 'node:stream';
import { promisify } from 'node:util';
import { pipeline } from 'node:stream/promises';
import JSONStream from 'JSONStream';

import { geojsonToPolyBoundary } from '../geometry/ConversionUtils';
Expand Down Expand Up @@ -155,24 +154,50 @@ class OsmOverpassDownloaderImpl implements OsmOverpassDownloader {
overpassXmlQueryString: string = allWaysAndRelationsXmlQuery
): Promise<boolean> {
const response = await this.downloadData(boundPoly, overpassXmlQueryString, 'json');
const streamPipeline = promisify(pipeline);

console.log('Writing osm geojson data to ' + filename);

// Inspired by a conversation with ChatGPT :)
const writeStream = fs.createWriteStream(filename);
writeStream.write('{"type":"FeatureCollection","features":['); // Put all the data in one big feature collection
const jsonStream = JSONStream.parse('elements.*');
jsonStream.on('data', (element) => {
const geojsonData = osmToGeojson({ elements: [element] });
writeStream.write(JSON.stringify(geojsonData));
});
jsonStream.on('end', () => {
writeStream.end();
});
await streamPipeline(response.body, jsonStream);
console.log('Done writing osm geojson data');
let firstIteration = true;

return true;
//Encapsulating the logic of the streams in a promise allows for each event to happen when expected, particularly when writing a large file.
return new Promise((resolve, reject) => {
//If there is an error when reading the json object, reject the promise.
jsonStream.on('error', (e) => {
console.error(e);
reject(e);
});

jsonStream.on('data', (element) => {
const geojsonData = osmToGeojson({ elements: [element] });
if (geojsonData.features.length > 0) {
const featuresString = JSON.stringify(geojsonData.features).slice(1, -1); // Remove the opening and closing brackets
const dataOk = writeStream.write((firstIteration ? '' : ',') + featuresString); //Write the comma first (except on the first iteration) so that there will be no trailing comma.

if (!dataOk) {
// If writeStream hasn't finished writing, pause the jsonStream to let it catch up
jsonStream.pause();
writeStream.once('drain', () => {
jsonStream.resume();
});
}

firstIteration = false;
}
});

jsonStream.on('end', () => {
// Write the closing brackets
writeStream.end(']}', () => {
console.log('Done writing osm geojson data');
resolve(true);
});
});

pipeline(response.body, jsonStream); //Pipes the response's body to the jsonStream, and executes the streams' logic.
});
}
/**
* Download data from openstreetmap API, and return an xml string of the result
Expand Down Expand Up @@ -227,9 +252,8 @@ class OsmOverpassDownloaderImpl implements OsmOverpassDownloader {
): Promise<boolean> {
const response = await this.downloadData(boundPoly, overpassXmlQueryString, fileType);
// Taken from fetch-node documentation
const streamPipeline = promisify(pipeline);
console.log('Writing osm data to ' + filename);
await streamPipeline(response.body, fs.createWriteStream(filename));
await pipeline(response.body, fs.createWriteStream(filename));
console.log('Done writing osm data');
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import OsmOverpassDownloader from '../OsmOverpassDownloader';
import GeoJSON from 'geojson';
import fetch from 'node-fetch';
import { Readable, Writable } from 'node:stream';
import fs from 'fs';

jest.mock('node-fetch', () => jest.fn());
const mockedFetch = fetch as jest.MockedFunction<typeof fetch>;
Expand Down Expand Up @@ -100,6 +102,29 @@ const jsonData = {
]
};

const geojsonWritten = {
"type":"FeatureCollection",
"features":[
{
"type":"Feature",
"id":"node/123",
"properties":{"timestamp":"2020-02-08T17:16:30Z","version":1,"changeset":1,"user":"osmUser","uid":1,"id":"node/123"},
"geometry":{"type":"Point","coordinates":[-73.9678132,45.3941161]}
},
{
"type":"Feature",
"id":"node/234",
"properties":{"timestamp":"2020-02-08T17:16:30Z","version":1,"changeset":1,"user":"osmUser","uid":1,"id":"node/234"},
"geometry":{"type":"Point","coordinates":[-73.9544677,45.3752717]}
},
{
"type":"Feature",
"id":"node/345",
"properties":{"timestamp":"2020-02-08T17:16:30Z","version":1,"changeset":1,"user":"osmUser","uid":1,"id":"node/345"},
"geometry":{"type":"Point","coordinates":[-73.9545144,45.3751139]}
}
]};

const xmlData = `<?xml version="1.0" encoding="UTF-8"?>
<osm version="0.6" generator="Overpass API 0.7.56.8 7d656e78">
<note>The data included in this document is from www.openstreetmap.org. The data is made available under ODbL.</note>
Expand Down Expand Up @@ -205,3 +230,36 @@ test('download xml data from overpass', async () => {
expect(xmlContent).toEqual(xmlData);

});

test('fetch and write geojson', async () => {
let mockWriteStream;
let writtenData = '';
let streamFilename;
mockWriteStream = new Writable({
write(chunk, _encoding, callback) {
writtenData += chunk.toString();
callback();
}
});

jest.spyOn(fs, 'createWriteStream').mockImplementation((path) => {
streamFilename = path;
return mockWriteStream;
});

const streamBody = new Readable();
streamBody.push(JSON.stringify(jsonData));
streamBody.push(null);
const response = Promise.resolve({
ok: true,
status: 200,
body: streamBody
});
mockedFetch.mockResolvedValue(response);

const writeIsSuccessful = await OsmOverpassDownloader.fetchAndWriteGeojson('./test.json', geojsonBoundaryPolygon);
expect(writeIsSuccessful).toBeTruthy();
expect(mockedFetch).toHaveBeenCalledTimes(1);
expect(writtenData).toBe(JSON.stringify(geojsonWritten));
expect(streamFilename).toBe('./test.json');
});

0 comments on commit e37c13c

Please sign in to comment.