Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CGMES loading from zipped profiles inside a folder #3309

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import com.powsybl.cgmes.conversion.naming.NamingStrategyFactory;
import com.powsybl.cgmes.model.*;
import com.powsybl.commons.PowsyblException;
import com.powsybl.commons.compress.SafeZipInputStream;
import com.powsybl.commons.config.PlatformConfig;
import com.powsybl.commons.datasource.CompressionFormat;
import com.powsybl.commons.datasource.DataSource;
import com.powsybl.commons.datasource.DataSourceUtil;
import com.powsybl.commons.datasource.GenericReadOnlyDataSource;
Expand Down Expand Up @@ -45,6 +47,7 @@
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.zip.ZipInputStream;

import static java.util.function.Predicate.not;

Expand Down Expand Up @@ -303,8 +306,26 @@ private Set<ReadOnlyDataSource> separateByModelingAuthority() {

private Optional<String> readModelingAuthority(String name) {
String modellingAuthority = null;
try (InputStream is = dataSource.newInputStream(name)) {
XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(is);
try (InputStream in = dataSource.newInputStream(name)) {
String fileExtension = name.substring(name.lastIndexOf('.') + 1);
if (fileExtension.equals(CompressionFormat.ZIP.getExtension())) {
try (SafeZipInputStream zis = new SafeZipInputStream(new ZipInputStream(in), 1, 2048)) {
modellingAuthority = readModelingAuthority(zis);
}
} else {
modellingAuthority = readModelingAuthority(in);
}
} catch (IOException | XMLStreamException e) {
throw new PowsyblException(e);
}
return Optional.ofNullable(modellingAuthority);
}

private String readModelingAuthority(InputStream is) throws XMLStreamException {
String modellingAuthority = null;
XMLStreamReader reader = null;
try {
reader = xmlInputFactory.createXMLStreamReader(is);
boolean stopReading = false;
while (reader.hasNext() && !stopReading) {
int token = reader.next();
Expand All @@ -317,11 +338,12 @@ private Optional<String> readModelingAuthority(String name) {
stopReading = true;
}
}
reader.close();
} catch (IOException | XMLStreamException e) {
throw new PowsyblException(e);
} finally {
if (reader != null) {
reader.close();
}
}
return Optional.ofNullable(modellingAuthority);
return modellingAuthority;
}

private Set<ReadOnlyDataSource> separateByIgmName() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/**
* Copyright (c) 2025, RTE (http://www.rte-france.com)
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
* SPDX-License-Identifier: MPL-2.0
*/
package com.powsybl.cgmes.conversion.test;

import com.google.common.io.ByteStreams;
import com.google.common.jimfs.Configuration;
import com.google.common.jimfs.Jimfs;
import com.powsybl.cgmes.conformity.CgmesConformity1Catalog;
import com.powsybl.cgmes.model.CgmesModelException;
import com.powsybl.commons.datasource.DataSource;
import com.powsybl.commons.datasource.ReadOnlyDataSource;
import com.powsybl.iidm.network.Network;
import org.junit.jupiter.api.Test;

import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
/**
* @author Giovanni Ferrari {@literal <giovanni.ferrari at soft.it>}
*/
class LoadZippedProfilesTest {

@Test
void oneFolderAndOneZipPerProfileTest() throws Exception {
var testDataSource = CgmesConformity1Catalog.microGridBaseCaseBE().dataSource();
Set<String> profiles = testDataSource.listNames(".*");
try (FileSystem fileSystem = Jimfs.newFileSystem(Configuration.unix())) {
// copy and compress each of the profile to the file system
Path workDir = fileSystem.getPath("/work");
for (String profile : profiles) {
try (var is = testDataSource.newInputStream(profile);
var os = new ZipOutputStream(Files.newOutputStream(workDir.resolve(profile + ".zip")))) {
os.putNextEntry(new ZipEntry(profile));
ByteStreams.copy(is, os);
os.closeEntry();
}
}

Network network = Network.read(DataSource.fromPath(workDir));
assertNotNull(network);
}
}

@Test
void emptyZipErrorTest() throws Exception {
try (FileSystem fileSystem = Jimfs.newFileSystem(Configuration.unix())) {
Path workDir = fileSystem.getPath("/work");
try (var os = new ZipOutputStream(Files.newOutputStream(workDir.resolve("empty.zip")))) {
os.closeEntry();
}

ReadOnlyDataSource datasource = DataSource.fromPath(workDir);
CgmesModelException ex = assertThrows(CgmesModelException.class, () -> Network.read(datasource));
assertEquals("Zip entry index out of bounds: 1", ex.getCause().getMessage());
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2017-2018, RTE (http://www.rte-france.com)
* Copyright (c) 2017-2025, RTE (http://www.rte-france.com)
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
Expand All @@ -8,22 +8,25 @@

package com.powsybl.cgmes.model;

import com.powsybl.commons.compress.SafeZipInputStream;
import com.powsybl.commons.datasource.CompressionFormat;
import com.powsybl.commons.datasource.ReadOnlyDataSource;

import javax.xml.stream.XMLStreamException;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.zip.ZipInputStream;

import static com.powsybl.cgmes.model.CgmesNamespace.*;

/**
* @author Luma Zamarreño {@literal <zamarrenolm at aia.es>}
*/
public class CgmesOnDataSource {
private static final String LISTING_CGMES_NAMES_IN_DATA_SOURCE = "Listing CGMES names in data source %s";
private static final String EXTENSION = "xml";

public CgmesOnDataSource(ReadOnlyDataSource ds) {
Expand Down Expand Up @@ -87,13 +90,7 @@ private boolean existsNamespacesCim14(Set<String> namespaces) {
public String baseName() {
// Get the base URI if present, else build an absolute URI from the data source base name
return names().stream()
.map(n -> {
try (InputStream is = dataSource.newInputStream(n)) {
return NamespaceReader.base(is);
} catch (IOException x) {
throw new UncheckedIOException(x);
}
})
.map(n -> loadInputStreamAndGetNamespace(n, NamespaceReader::base))
.filter(Objects::nonNull)
.findFirst()
.orElseGet(() -> {
Expand All @@ -109,34 +106,46 @@ public Set<String> names() {
try {
// the set of names may be empty if the data source does not contain CGMES data
Set<String> allNames = dataSource.listNames(REGEX_VALID_NAME);
allNames.removeIf(n -> !containsValidNamespace(n));
allNames.removeIf(n -> !existsInDatasource(n) || !containsValidNamespace(n));
return allNames;
} catch (IOException x) {
throw new CgmesModelException(String.format("Listing CGMES names in data source %s", dataSource), x);
throw new CgmesModelException(String.format(LISTING_CGMES_NAMES_IN_DATA_SOURCE, dataSource), x);
}
}

private boolean containsValidNamespace(String name) {
try (InputStream is = dataSource.newInputStream(name)) {
Set<String> ns = NamespaceReader.namespaces1(is);
return ns.contains(RDF_NAMESPACE) && ns.stream().anyMatch(CgmesNamespace::isValid);
} catch (XMLStreamException e) {
private boolean existsInDatasource(String fileName) {
try {
return dataSource.exists(fileName);
} catch (IOException e) {
return false;
} catch (IOException x) {
throw new CgmesModelException(String.format("Listing CGMES names in data source %s", dataSource), x);
}
}

public Set<String> namespaces() {
Set<String> ns = new HashSet<>();
names().forEach(n -> {
try (InputStream is = dataSource.newInputStream(n)) {
ns.addAll(NamespaceReader.namespaces(is));
} catch (IOException x) {
throw new UncheckedIOException(x);
private <T> T loadInputStreamAndGetNamespace(String n, Function<InputStream, T> namespaceGetter) {
try (InputStream in = dataSource.newInputStream(n)) {
String fileExtension = n.substring(n.lastIndexOf('.') + 1);
if (fileExtension.equals(CompressionFormat.ZIP.getExtension())) {
try (SafeZipInputStream zis = new SafeZipInputStream(new ZipInputStream(in), 1, 1024)) {
return namespaceGetter.apply(zis);
}
} else {
return namespaceGetter.apply(in);
}
});
return ns;
} catch (IOException e) {
throw new CgmesModelException(String.format(LISTING_CGMES_NAMES_IN_DATA_SOURCE, dataSource), e);
}
}

private boolean containsValidNamespace(String name) {
Set<String> ns = loadInputStreamAndGetNamespace(name, NamespaceReader::namespacesOrEmpty);
return ns.contains(RDF_NAMESPACE) && ns.stream().anyMatch(CgmesNamespace::isValid);
}

public Set<String> namespaces() {
return names().stream()
.map(name -> loadInputStreamAndGetNamespace(name, NamespaceReader::namespaces))
.flatMap(Set::stream)
.collect(Collectors.toSet());
}

public String cimNamespace() {
Expand All @@ -154,5 +163,5 @@ public String cimNamespace() {
// Any number of characters from the start
+ "^.*"
// Ending with extension .xml
+ "\\.XML$";
+ "\\.(XML|ZIP)$";
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ public static Set<String> namespaces(InputStream is) {
}
}

public static Set<String> namespaces1(InputStream is) throws XMLStreamException {
public static Set<String> namespacesOrEmpty(InputStream is) {
try {
return namespaces1(is);
} catch (XMLStreamException x) {
return Set.of();
}
}

private static Set<String> namespaces1(InputStream is) throws XMLStreamException {
Set<String> found = new HashSet<>();
XMLStreamReader xmlsr = XML_INPUT_FACTORY_SUPPLIER.get().createXMLStreamReader(is);
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/**
* Copyright (c) 2025, RTE (http://www.rte-france.com)
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
* SPDX-License-Identifier: MPL-2.0
*/
package com.powsybl.commons.compress;

import java.io.IOException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import com.powsybl.commons.io.ForwardingInputStream;

public class SafeZipInputStream extends ForwardingInputStream<ZipInputStream> {

private int bytesRead;
private int maxBytesToRead;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to go further than 2GB, so an int is not enough.

Suggested change
private int bytesRead;
private int maxBytesToRead;
private long bytesRead;
private long maxBytesToRead;


public SafeZipInputStream(ZipInputStream in, int entryNumber, int maxBytesToRead) throws IOException {
super(in);
this.maxBytesToRead = maxBytesToRead;
for (int i = 0; i < entryNumber; i++) {
ZipEntry zipEntry = in.getNextEntry();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to reuse this for other usecases where several ZipInputStream entries have to be read. This is not really easy to use that way, I think you'd need to create a new SafeZipInputStream(zin, 1, max) for each entry?

Adding a SafeZipInputStream::getNextEntry method would do the trick, at the cost of adding a protected T getDelegate() in ForwardingInputStream.

if (zipEntry == null) {
throw new IOException(String.format("Zip entry index out of bounds: %s", entryNumber));
}
}
}

@Override
public int read() throws IOException {
int byteRead = super.read();
if (byteRead != -1 && ++this.bytesRead > this.maxBytesToRead) {
throw new IOException("Max bytes to read exceeded");
}
return byteRead;
}

@Override
public int read(byte[] b, int off, int len) throws IOException {
int byteRead = super.read(b, off, len);
if (byteRead != -1 && (this.bytesRead + byteRead) > this.maxBytesToRead) {
throw new IOException("Max bytes to read exceeded");
}
this.bytesRead += byteRead;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to avoid doing twice the addition include this in an if (byteRead + -1) before comparing to the max

return byteRead;
}
}
Loading