Skip to content

Commit

Permalink
step
Browse files Browse the repository at this point in the history
  • Loading branch information
ddecrulle committed Jan 6, 2025
1 parent 8fae9a8 commit 5d0c9ef
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 54 deletions.
Binary file not shown.
37 changes: 21 additions & 16 deletions web/src/core/adapters/sqlOlap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,23 @@ export const createDuckDbSqlOlap = (params: {
return db;
};
})(),
getRows: async ({ sourceUrl, rowsPerPage, page }) => {
getRows: async ({ sourceUrl, fileType, rowsPerPage, page }) => {
const db = await sqlOlap.getConfiguredAsyncDuckDb();

const conn = await db.connect();

const stmt = await conn.prepare(
`SELECT * FROM "${sourceUrl}" LIMIT ${rowsPerPage} OFFSET ${
rowsPerPage * (page - 1)
}`
);
const sqlQuery = `SELECT * FROM ${(() => {
switch (fileType) {
case "csv":
return `read_csv('${sourceUrl}')`;
case "parquet":
return `read_parquet('${sourceUrl}')`;
case "json":
return `read_json('${sourceUrl}')`;
}
})()} LIMIT ${rowsPerPage} OFFSET ${rowsPerPage * (page - 1)}`;

const stmt = await conn.prepare(sqlQuery);

const res = await stmt.query();

Expand All @@ -139,24 +146,22 @@ export const createDuckDbSqlOlap = (params: {
return rows;
},
getRowCount: memoize(
async sourceUrl => {
if (!new URL(sourceUrl).pathname.endsWith(".parquet")) {
async ({ sourceUrl, fileType }) => {
if (fileType !== "parquet") {
return undefined;
}

const db = await sqlOlap.getConfiguredAsyncDuckDb();

const conn = await db.connect();

const stmt = await conn.prepare(
`SELECT count(*)::INTEGER as v FROM "${sourceUrl}";`
);

const res = await stmt.query();

const count: number = JSON.parse(JSON.stringify(res.toArray()))[0]["v"];
const query = `SELECT count(*)::INTEGER as v FROM read_parquet("${sourceUrl}");`;

return count;
return conn
.prepare(query)
.then(stmt => stmt.query())
.then(res => res.toArray()[0]["v"])
.finally(() => conn.close());
},
{ promise: true, max: 1 }
)
Expand Down
6 changes: 5 additions & 1 deletion web/src/core/ports/SqlOlap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ import type { AsyncDuckDB } from "@duckdb/duckdb-wasm";

export type SqlOlap = {
getConfiguredAsyncDuckDb: () => Promise<AsyncDuckDB>;
getRowCount: (sourceUrl: string) => Promise<number | undefined>;
getRowCount: (params: {
sourceUrl: string;
fileType: "parquet" | "csv" | "json";
}) => Promise<number | undefined>;
getRows: (params: {
sourceUrl: string;
fileType: "parquet" | "csv" | "json";
rowsPerPage: number;
page: number;
}) => Promise<any[]>;
Expand Down
2 changes: 0 additions & 2 deletions web/src/core/usecases/dataExplorer/selectors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ const main = createSelector(state, columns, (state, columns) => {
isQuerying,
rows: undefined
};
case "unknownFileType":
return { isQuerying, queryParams, shouldAskFileType: true };
case "loaded": {
assert(columns !== undefined);
assert(queryParams !== undefined);
Expand Down
39 changes: 20 additions & 19 deletions web/src/core/usecases/dataExplorer/state.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ export type State = {
fileDownloadUrl: string;
fileType: "parquet" | "csv" | "json";
}
| { state: "unknownFileType"; fileType: undefined; fileDownloadUrl: string }
| { state: "empty" };
// | { state: "unknownFileType"; fileType: undefined; fileDownloadUrl: string }
};

export const { actions, reducer } = createUsecaseActions({
Expand Down Expand Up @@ -120,24 +120,24 @@ export const { actions, reducer } = createUsecaseActions({
};
},
//Rename this, i want to end query because not able to auto detect fileType
terminateQueryDueToUnknownFileType: (
state,
{
payload
}: {
payload: {
fileDownloadUrl: string;
};
}
) => {
const { fileDownloadUrl } = payload;
state.isQuerying = false;
state.data = {
state: "unknownFileType",
fileDownloadUrl,
fileType: undefined
};
},
// terminateQueryDueToUnknownFileType: (
// state,
// {
// payload
// }: {
// payload: {
// fileDownloadUrl: string;
// };
// }
// ) => {
// const { fileDownloadUrl } = payload;
// state.isQuerying = false;
// state.data = {
// state: "unknownFileType",
// fileDownloadUrl,
// fileType: undefined
// };
// },
queryCanceled: state => {
state.isQuerying = false;
state.queryParams = undefined;
Expand All @@ -151,6 +151,7 @@ export const { actions, reducer } = createUsecaseActions({
state.queryParams = undefined;
state.extraRestorableStates = undefined;
state.data = { state: "empty" };
state.errorMessage = undefined;
}
}
});
28 changes: 17 additions & 11 deletions web/src/core/usecases/dataExplorer/thunks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,20 @@ const privateThunks = {
fileDownloadUrl: data.fileDownloadUrl
};
}
const toto = await dispatch(
privateThunks.detectFileType({ sourceUrl })
);
console.log(toto);
return toto;
return dispatch(privateThunks.detectFileType({ sourceUrl }));
})();

if (fileType === undefined) {
// dispatch(
// actions.terminateQueryDueToUnknownFileType({
// fileDownloadUrl: fileDownloadUrlOrUndefined
// })
// );
dispatch(
actions.terminateQueryDueToUnknownFileType({
fileDownloadUrl: fileDownloadUrlOrUndefined
actions.queryFailed({
//TODO Improve
errorMessage:
"Unable to detect the file type, we support only parquet, csv and json."
})
);
return;
Expand All @@ -114,7 +117,7 @@ const privateThunks = {
}

const rowCountOrErrorMessage = await sqlOlap
.getRowCount(sourceUrl)
.getRowCount({ sourceUrl, fileType })
.catch(error => String(error));

return rowCountOrErrorMessage;
Expand All @@ -140,7 +143,8 @@ const privateThunks = {
.getRows({
sourceUrl,
rowsPerPage: rowsPerPage + 1,
page
page,
fileType
})
.catch(error => String(error));

Expand Down Expand Up @@ -223,6 +227,7 @@ const privateThunks = {
return undefined;
}

//Maybe it could be interesting to reject some content types and stop the detection
const contentTypeToExtension = [
{
keyword: "application/parquet" as const,
Expand Down Expand Up @@ -272,8 +277,9 @@ const privateThunks = {
const fileContent = new TextDecoder().decode(bytes);
return (
fileContent.includes(",") ||
fileContent.includes("\n") ||
fileContent.includes(";")
fileContent.includes("|") ||
fileContent.includes(";") ||
fileContent.includes("\t")
); // CSV heuristic
},
extension: "csv" as const
Expand Down
5 changes: 1 addition & 4 deletions web/src/ui/pages/dataExplorer/DataExplorer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ export default function DataExplorer(props: Props) {
columns,
rowCount,
errorMessage,
isQuerying,
shouldAskFileType
isQuerying
} = useCoreState("dataExplorer", "main");

useEffect(() => {
Expand Down Expand Up @@ -89,7 +88,6 @@ export default function DataExplorer(props: Props) {

const { classes, cx } = useStyles();

console.log("core props", { rows, queryParams, errorMessage, shouldAskFileType });
// Theres a bug in MUI classes.panel does not apply so have to apply the class manually
const { childrenClassName: dataGridPanelWrapperRefClassName } =
useApplyClassNameToParent({
Expand Down Expand Up @@ -136,7 +134,6 @@ export default function DataExplorer(props: Props) {
<div className={classes.mainArea}>
{(() => {
if (errorMessage !== undefined) {
console.log(queryParams);
return (
<Alert className={classes.errorAlert} severity="error">
{errorMessage}
Expand Down
6 changes: 5 additions & 1 deletion web/src/ui/pages/myFiles/MyFiles.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,11 @@ function MyFiles(props: Props) {

const onOpenFile = useConstCallback<ExplorerProps["onOpenFile"]>(({ basename }) => {
//TODO use dataExplorer thunk
if (basename.endsWith(".parquet") || basename.endsWith(".csv")) {
if (
basename.endsWith(".parquet") ||
basename.endsWith(".csv") ||
basename.endsWith(".json")
) {
const { path } = route.params;

assert(path !== undefined);
Expand Down

0 comments on commit 5d0c9ef

Please sign in to comment.