Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CGMES loading from zipped profiles inside a folder #3309

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
import com.powsybl.cgmes.conversion.naming.NamingStrategyFactory;
import com.powsybl.cgmes.model.*;
import com.powsybl.commons.PowsyblException;
import com.powsybl.commons.compress.ZipSecurityHelper;
import com.powsybl.commons.config.PlatformConfig;
import com.powsybl.commons.datasource.CompressionFormat;
import com.powsybl.commons.datasource.DataSource;
import com.powsybl.commons.datasource.DataSourceUtil;
import com.powsybl.commons.datasource.GenericReadOnlyDataSource;
Expand Down Expand Up @@ -45,6 +47,8 @@
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import static java.util.function.Predicate.not;

Expand Down Expand Up @@ -303,8 +307,31 @@ private Set<ReadOnlyDataSource> separateByModelingAuthority() {

private Optional<String> readModelingAuthority(String name) {
String modellingAuthority = null;
try (InputStream is = dataSource.newInputStream(name)) {
XMLStreamReader reader = xmlInputFactory.createXMLStreamReader(is);
try (InputStream in = dataSource.newInputStream(name)) {
String fileExtension = name.substring(name.lastIndexOf('.') + 1);
if (fileExtension.equals(CompressionFormat.ZIP.getExtension())) {
ZipSecurityHelper.checkIfZipExtractionIsSafe(dataSource, name);
try (ZipInputStream zis = new ZipInputStream(in)) {
ZipEntry zipEntry = zis.getNextEntry();
if (zipEntry == null) {
throw new IOException("No entry found in zip file " + name);
}
modellingAuthority = readModelingAuthority(zis);
}
} else {
modellingAuthority = readModelingAuthority(in);
}
} catch (IOException | XMLStreamException e) {
throw new PowsyblException(e);
}
return Optional.ofNullable(modellingAuthority);
}

private String readModelingAuthority(InputStream is) throws XMLStreamException {
String modellingAuthority = null;
XMLStreamReader reader = null;
try {
reader = xmlInputFactory.createXMLStreamReader(is);
boolean stopReading = false;
while (reader.hasNext() && !stopReading) {
int token = reader.next();
Expand All @@ -317,11 +344,12 @@ private Optional<String> readModelingAuthority(String name) {
stopReading = true;
}
}
reader.close();
} catch (IOException | XMLStreamException e) {
throw new PowsyblException(e);
} finally {
if (reader != null) {
reader.close();
}
}
return Optional.ofNullable(modellingAuthority);
return modellingAuthority;
}

private Set<ReadOnlyDataSource> separateByIgmName() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/**
* Copyright (c) 2025, RTE (http://www.rte-france.com)
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
* SPDX-License-Identifier: MPL-2.0
*/
package com.powsybl.cgmes.conversion.test;

import com.google.common.io.ByteStreams;
import com.google.common.jimfs.Configuration;
import com.google.common.jimfs.Jimfs;
import com.powsybl.cgmes.conformity.CgmesConformity1Catalog;
import com.powsybl.cgmes.model.CgmesModelException;
import com.powsybl.commons.datasource.DataSource;
import com.powsybl.commons.datasource.ReadOnlyDataSource;
import com.powsybl.iidm.network.Network;
import org.junit.jupiter.api.Test;

import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
/**
* @author Giovanni Ferrari {@literal <giovanni.ferrari at soft.it>}
*/
class LoadZippedProfilesTest {

@Test
void oneFolderAndOneZipPerProfileTest() throws Exception {
var testDataSource = CgmesConformity1Catalog.microGridBaseCaseBE().dataSource();
Set<String> profiles = testDataSource.listNames(".*");
try (FileSystem fileSystem = Jimfs.newFileSystem(Configuration.unix())) {
// copy and compress each of the profile to the file system
Path workDir = fileSystem.getPath("/work");
for (String profile : profiles) {
try (var is = testDataSource.newInputStream(profile);
var os = new ZipOutputStream(Files.newOutputStream(workDir.resolve(profile + ".zip")))) {
os.putNextEntry(new ZipEntry(profile));
ByteStreams.copy(is, os);
os.closeEntry();
}
}

Network network = Network.read(DataSource.fromPath(workDir));
assertNotNull(network);
}
}

@Test
void emptyZipErrorTest() throws Exception {
var testDataSource = CgmesConformity1Catalog.microGridBaseCaseBE().dataSource();
Set<String> profiles = testDataSource.listNames(".*");
try (FileSystem fileSystem = Jimfs.newFileSystem(Configuration.unix())) {
// copy and compress each of the profile to the file system
Path workDir = fileSystem.getPath("/work");
for (String profile : profiles) {
try (var is = testDataSource.newInputStream(profile);
var os = new ZipOutputStream(Files.newOutputStream(workDir.resolve(profile + ".zip")))) {
os.closeEntry();
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// copy and compress each of the profile to the file system
Path workDir = fileSystem.getPath("/work");
for (String profile : profiles) {
try (var is = testDataSource.newInputStream(profile);
var os = new ZipOutputStream(Files.newOutputStream(workDir.resolve(profile + ".zip")))) {
os.closeEntry();
}
}
Path workDir = fileSystem.getPath("/work");
for (String profile : profiles) {
try (var os = new ZipOutputStream(Files.newOutputStream(workDir.resolve(profile + ".zip")))) {
os.closeEntry();
}
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In emptyZipErrorTest(), there's no need to create an input stream for each profile since they are not read.


ReadOnlyDataSource datasource = DataSource.fromPath(workDir);
CgmesModelException ex = assertThrows(CgmesModelException.class, () -> Network.read(datasource));
assertEquals("No entry found in zip file MicroGridTestConfiguration_BC_BE_DL_V2.xml.zip", ex.getCause().getMessage());
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2017-2018, RTE (http://www.rte-france.com)
* Copyright (c) 2017-2025, RTE (http://www.rte-france.com)
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
Expand All @@ -8,22 +8,26 @@

package com.powsybl.cgmes.model;

import com.powsybl.commons.compress.ZipSecurityHelper;
import com.powsybl.commons.datasource.CompressionFormat;
import com.powsybl.commons.datasource.ReadOnlyDataSource;

import javax.xml.stream.XMLStreamException;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import static com.powsybl.cgmes.model.CgmesNamespace.*;

/**
* @author Luma Zamarreño {@literal <zamarrenolm at aia.es>}
*/
public class CgmesOnDataSource {
private static final String LISTING_CGMES_NAMES_IN_DATA_SOURCE = "Listing CGMES names in data source %s";
private static final String EXTENSION = "xml";

public CgmesOnDataSource(ReadOnlyDataSource ds) {
Expand Down Expand Up @@ -87,13 +91,7 @@ private boolean existsNamespacesCim14(Set<String> namespaces) {
public String baseName() {
// Get the base URI if present, else build an absolute URI from the data source base name
return names().stream()
.map(n -> {
try (InputStream is = dataSource.newInputStream(n)) {
return NamespaceReader.base(is);
} catch (IOException x) {
throw new UncheckedIOException(x);
}
})
.map(n -> loadInputStreamAndGetNamespace(n, NamespaceReader::base))
.filter(Objects::nonNull)
.findFirst()
.orElseGet(() -> {
Expand All @@ -109,34 +107,51 @@ public Set<String> names() {
try {
// the set of names may be empty if the data source does not contain CGMES data
Set<String> allNames = dataSource.listNames(REGEX_VALID_NAME);
allNames.removeIf(n -> !containsValidNamespace(n));
allNames.removeIf(n -> !existsInDatasource(n) || !containsValidNamespace(n));
return allNames;
} catch (IOException x) {
throw new CgmesModelException(String.format("Listing CGMES names in data source %s", dataSource), x);
throw new CgmesModelException(String.format(LISTING_CGMES_NAMES_IN_DATA_SOURCE, dataSource), x);
}
}

private boolean containsValidNamespace(String name) {
try (InputStream is = dataSource.newInputStream(name)) {
Set<String> ns = NamespaceReader.namespaces1(is);
return ns.contains(RDF_NAMESPACE) && ns.stream().anyMatch(CgmesNamespace::isValid);
} catch (XMLStreamException e) {
private boolean existsInDatasource(String fileName) {
try {
return dataSource.exists(fileName);
} catch (IOException e) {
return false;
} catch (IOException x) {
throw new CgmesModelException(String.format("Listing CGMES names in data source %s", dataSource), x);
}
}

public Set<String> namespaces() {
Set<String> ns = new HashSet<>();
names().forEach(n -> {
try (InputStream is = dataSource.newInputStream(n)) {
ns.addAll(NamespaceReader.namespaces(is));
} catch (IOException x) {
throw new UncheckedIOException(x);
private <T> T loadInputStreamAndGetNamespace(String n, Function<InputStream, T> namespaceGetter) {
try (InputStream in = dataSource.newInputStream(n)) {
String fileExtension = n.substring(n.lastIndexOf('.') + 1);
if (fileExtension.equals(CompressionFormat.ZIP.getExtension())) {
ZipSecurityHelper.checkIfZipExtractionIsSafe(dataSource, n);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a performance issue here. We're reading the whole unzipped file to detect if the zip extraction is safe but we only need the first tag (for namespaces definition or the base attribute). I think we need to do something smarter to get safely the first characters only, not unzipping the complete file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I propose to check just first zip entry safety, checking only the compression ratio

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a simpler way is to use a SafeZipInputStream class wrapping the ZipInputStream instead of the ZipInputStream itself:

public class SafeZipInputStream extends ForwardingInputStream<ZipInputStream> {
    @Override
    public int read() throws IOException {
        int byteRead = super.read();
        if (byteRead != -1 && this.bytesRead++ > this.maxBytesToRead) {
          throw new IOException();
        }
        return byteRead;
    }

    @Override
    public int read(byte[] b, int off, int len) throws IOException {
        ... // similar way
    }
}

That way you'll only read the first lines, not unzipping the full file, which might be more than 1GB uncompressed. Besides we could restrict maxBytesRead a lot in this PR (a few kB, or 1MB?), as we only read the first tag anyway (we stop at first START_ELEMENT in the 3 use cases).

try (ZipInputStream zis = new ZipInputStream(in)) {
ZipEntry zipEntry = zis.getNextEntry();
if (zipEntry == null) {
throw new IOException("No entry found in zip file " + n);
}
return namespaceGetter.apply(zis);
}
} else {
return namespaceGetter.apply(in);
}
});
return ns;
} catch (IOException e) {
throw new CgmesModelException(String.format(LISTING_CGMES_NAMES_IN_DATA_SOURCE, dataSource), e);
}
}

private boolean containsValidNamespace(String name) {
Set<String> ns = loadInputStreamAndGetNamespace(name, NamespaceReader::namespacesOrEmpty);
return ns.contains(RDF_NAMESPACE) && ns.stream().anyMatch(CgmesNamespace::isValid);
}

public Set<String> namespaces() {
return names().stream()
.map(name -> loadInputStreamAndGetNamespace(name, NamespaceReader::namespaces))
.flatMap(Set::stream)
.collect(Collectors.toSet());
}

public String cimNamespace() {
Expand All @@ -154,5 +169,5 @@ public String cimNamespace() {
// Any number of characters from the start
+ "^.*"
// Ending with extension .xml
+ "\\.XML$";
+ "\\.(XML|ZIP)$";
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ public static Set<String> namespaces(InputStream is) {
}
}

public static Set<String> namespaces1(InputStream is) throws XMLStreamException {
public static Set<String> namespacesOrEmpty(InputStream is) {
try {
return namespaces1(is);
} catch (XMLStreamException x) {
return Set.of();
}
}

private static Set<String> namespaces1(InputStream is) throws XMLStreamException {
Set<String> found = new HashSet<>();
XMLStreamReader xmlsr = XML_INPUT_FACTORY_SUPPLIER.get().createXMLStreamReader(is);
try {
Expand Down
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed as it's not needed anymore (in this PR at least!) we should not keep it

Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/**
* Copyright (c) 2025, RTE (http://www.rte-france.com)
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
* SPDX-License-Identifier: MPL-2.0
*/
package com.powsybl.commons.compress;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import com.powsybl.commons.datasource.ReadOnlyDataSource;

/**
* @author Giovanni Ferrari {@literal <giovanni.ferrari at soft.it>}
*/
public final class ZipSecurityHelper {

public static final int THRESHOLD_ENTRIES = 10000;
public static final int THRESHOLD_SIZE = 1000000000; // 1 GB
public static final double THRESHOLD_RATIO = 10;

private ZipSecurityHelper() {
}

public static void checkIfZipExtractionIsSafe(ReadOnlyDataSource dataSource, String name) {
checkIfZipExtractionIsSafe(dataSource, name, THRESHOLD_ENTRIES, THRESHOLD_SIZE, THRESHOLD_RATIO);
}

public static void checkIfZipExtractionIsSafe(ReadOnlyDataSource dataSource, String name, int thresholdEntries, int thresholdSize, double thresholdCompressionRatio) {
try (ZipInputStream is = new ZipInputStream(dataSource.newInputStream(name))) {
if (!ZipSecurityHelper.isZipFileSafe(is, thresholdEntries, thresholdSize, thresholdCompressionRatio)) {
throw new UncheckedIOException("Zip file extraction is not safe", new IOException());
}
} catch (IOException x) {
throw new UncheckedIOException(x);
}
}

public static boolean isZipFileSafe(ZipInputStream zipInputStream) throws IOException {
return isZipFileSafe(zipInputStream, THRESHOLD_ENTRIES, THRESHOLD_SIZE, THRESHOLD_RATIO);
}

public static boolean isZipFileSafe(ZipInputStream zipInputStream, int thresholdEntries, int thresholdSize, double thresholdCompressionRatio) throws IOException {
int totalSizeArchive = 0;
int totalEntryArchive = 0;
ZipEntry ze = zipInputStream.getNextEntry();
while (ze != null) {
totalEntryArchive++;

int nBytes = -1;
byte[] buffer = new byte[2048];
int totalSizeEntry = 0;
while ((nBytes = zipInputStream.read(buffer)) > 0) {
totalSizeEntry += nBytes;
totalSizeArchive += nBytes;

}

if (totalSizeArchive > thresholdSize) {
// the uncompressed data size is too much for the application resource capacity
return false;
}

if (totalEntryArchive > thresholdEntries) {
// too many entries in this archive, can lead to inodes exhaustion of the system
return false;
}
ZipEntry currentEntry = ze;
ze = zipInputStream.getNextEntry();
long entrySize = currentEntry.getCompressedSize();
double compressionRatio = (double) totalSizeEntry / (double) entrySize;
if (compressionRatio > thresholdCompressionRatio) {
// ratio between compressed and uncompressed data is highly suspicious, looks
// like a Zip Bomb Attack
return false;
}
}
return true;
}
}
Loading