Skip to content

Commit

Permalink
fix: duckdb extensions packaging
Browse files Browse the repository at this point in the history
  • Loading branch information
Nolife999 committed Mar 22, 2024
1 parent fe15440 commit fc79ca6
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 21 deletions.
14 changes: 10 additions & 4 deletions arc-utils/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,20 @@
</goals>
<configuration>
<target>
<mkdir dir="${basedir}/src/main/resources/duckdb/v${project.duckdb.version}/windows_amd64/"/>
<gunzip dest="${basedir}/src/main/resources/duckdb/v${project.duckdb.version}/windows_amd64/postgres_scanner.duckdb_extension">
<delete dir="${basedir}/src/main/resources/duckdb"></delete>
<delete dir="${basedir}/src/main/resources/duckdbtmp"></delete>
<mkdir dir="${basedir}/src/main/resources/duckdbtmp/v${project.duckdb.version}/windows_amd64/"/>
<gunzip dest="${basedir}/src/main/resources/duckdbtmp/v${project.duckdb.version}/windows_amd64/postgres_scanner.duckdb_extension">
<url url="http://extensions.duckdb.org/v${project.duckdb.version}/windows_amd64/postgres_scanner.duckdb_extension.gz" />
</gunzip>
<mkdir dir="${basedir}/src/main/resources/duckdb/v${project.duckdb.version}/linux_amd64/"/>
<gunzip dest="${basedir}/src/main/resources/duckdb/v${project.duckdb.version}/linux_amd64/postgres_scanner.duckdb_extension">
<mkdir dir="${basedir}/src/main/resources/duckdbtmp/v${project.duckdb.version}/linux_amd64/"/>
<gunzip dest="${basedir}/src/main/resources/duckdbtmp/v${project.duckdb.version}/linux_amd64/postgres_scanner.duckdb_extension">
<url url="http://extensions.duckdb.org/v${project.duckdb.version}/linux_amd64/postgres_scanner.duckdb_extension.gz" />
</gunzip>
<mkdir dir="${basedir}/src/main/resources/duckdb"/>
<zip basedir="${basedir}/src/main/resources/duckdbtmp" destfile="${basedir}/src/main/resources/duckdb/extensions.zip">
</zip>
<delete dir="${basedir}/src/main/resources/duckdbtmp"></delete>
</target>
</configuration>
</execution>
Expand Down
64 changes: 47 additions & 17 deletions arc-utils/src/main/java/fr/insee/arc/utils/parquet/ParquetDao.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
package fr.insee.arc.utils.parquet;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;

import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.springframework.core.io.ClassPathResource;
Expand All @@ -20,6 +24,8 @@
import fr.insee.arc.utils.database.TableToRetrieve;
import fr.insee.arc.utils.exception.ArcException;
import fr.insee.arc.utils.exception.ArcExceptionMessage;
import fr.insee.arc.utils.files.CompressedUtils;
import fr.insee.arc.utils.files.FileUtilsArc;
import fr.insee.arc.utils.ressourceUtils.ConnectionAttribute;
import fr.insee.arc.utils.ressourceUtils.PropertiesHandler;
import fr.insee.arc.utils.utils.FormatSQL;
Expand All @@ -28,12 +34,21 @@ public class ParquetDao {

private static final String ATTACHMENT_NAME_PREFIX = "pg";

// classpath file containing extension files
private static final String DUCKDB_EXTENSION_PROVIDED_FILE = "duckdb/extensions.zip";

// directory where extension will be unzip and used by duckdb
private static final String DUCKDB_EXTENSION_INSTALLATION_DIRECTORY = "./duckdb/";

public static void exportToParquet(List<TableToRetrieve> tables, String outputDirectory,
ParquetEncryptionKey encryptionKey) throws ArcException {

loadDuckdb();

try (Connection connection = DriverManager.getConnection("jdbc:duckdb:")) {

unzipDuckdbPostgresExtensions();

attachPostgresDatabasesToDuckdb(connection, encryptionKey);

// exporter la liste des tables en parquet
Expand All @@ -42,8 +57,6 @@ public static void exportToParquet(List<TableToRetrieve> tables, String outputDi
}

} catch (SQLException | IOException e) {
System.out.println("§§§§§");
System.out.println(ExceptionUtils.getStackTrace(e));
throw new ArcException(ArcExceptionMessage.DATABASE_CONNECTION_FAILED);
}

Expand Down Expand Up @@ -96,28 +109,16 @@ private static void attachPostgresDatabasesToDuckdb(Connection connection, Parqu
PropertiesHandler properties = PropertiesHandler.getInstance();
int numberOfPods = properties.getConnectionProperties().size();

File folder = new ClassPathResource("duckdb").getFile();

File target = new File("./duckdb");
FileUtils.copyDirectory(folder, target);
String path=target.getAbsolutePath();

System.out.println("§§§§§");
System.out.println(path);

GenericPreparedStatementBuilder query = new GenericPreparedStatementBuilder();
query.append("SET custom_extension_repository = " + query.quoteText(path) + ";\n");
query.append("SET extension_directory = " + query.quoteText(path) + ";\n");
query.append("SET custom_extension_repository = " + query.quoteText(DUCKDB_EXTENSION_INSTALLATION_DIRECTORY) + ";\n");
query.append("SET extension_directory = " + query.quoteText(DUCKDB_EXTENSION_INSTALLATION_DIRECTORY) + ";\n");
query.append("INSTALL postgres;\n");

for (int connectionIndex = 0; connectionIndex < numberOfPods; connectionIndex++) {
ConnectionAttribute c = properties.getConnectionProperties().get(connectionIndex);

String connexionChain = "dbname=" + c.getDatabase() + " user=" + c.getDatabaseUsername() + " port="
+ c.getPort() + " password=" + c.getDatabasePassword() + " host=" + c.getHost();

System.out.println("§§§§§§§§§§§");
System.out.println(connexionChain);

query.append("ATTACH " + query.quoteText(connexionChain) + " AS " + attachmentName(connectionIndex)
+ " (TYPE postgres, READ_ONLY);\n");
Expand All @@ -132,6 +133,35 @@ private static void attachPostgresDatabasesToDuckdb(Connection connection, Parqu

}

private static void unzipDuckdbPostgresExtensions() throws IOException {
try (InputStream is = ParquetDao.class.getClassLoader().getResourceAsStream(DUCKDB_EXTENSION_PROVIDED_FILE)) {
try (ZipArchiveInputStream zis = new ZipArchiveInputStream(is)) {
ZipArchiveEntry zae = zis.getNextEntry();
while (zae != null) {

// if already uncompressed, try next entry
if (new File(DUCKDB_EXTENSION_INSTALLATION_DIRECTORY + zae).exists()) {
zae = zis.getNextEntry();
continue;
}

if (zae.isDirectory()) {
FileUtilsArc.createDirIfNotexist(DUCKDB_EXTENSION_INSTALLATION_DIRECTORY + zae);
} else {
try (FileOutputStream fos = new FileOutputStream(DUCKDB_EXTENSION_INSTALLATION_DIRECTORY + zae)) {
byte[] buffer = new byte[CompressedUtils.READ_BUFFER_SIZE];
int len;
while ((len = zis.read(buffer)) > 0) {
fos.write(buffer, 0, len);
}
}
}
zae = zis.getNextEntry();
}
}
}
}

private static void executeQuery(Connection connection, GenericPreparedStatementBuilder query) throws SQLException {
try (PreparedStatement stmt = connection.prepareStatement(query.getQueryWithParameters())) {
stmt.execute();
Expand Down

0 comments on commit fc79ca6

Please sign in to comment.