-
Notifications
You must be signed in to change notification settings - Fork 111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CQL Vector support #1165
base: main
Are you sure you want to change the base?
CQL Vector support #1165
Changes from 2 commits
08c322d
89f6109
2a158c6
3d80583
cf57346
440d63a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -454,6 +454,8 @@ pub enum CqlTypeParseError { | |
TupleLengthParseError(LowLevelDeserializationError), | ||
#[error("CQL Type not yet implemented, id: {0}")] | ||
TypeNotImplemented(u16), | ||
#[error("Failed to parse abstract type")] | ||
AbstractTypeParseError(), | ||
Comment on lines
+457
to
+458
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
#[error("Failed to parse custom CQL type: {0}")]
CustomTypeParseError(#[from] CustomTypeParseError) cc: @wprzytula There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: it's idiomatic to avoid the parentheses if the list of arguments for the variant is empty |
||
} | ||
|
||
/// A low level deserialization error. | ||
|
@@ -485,6 +487,8 @@ pub enum LowLevelDeserializationError { | |
InvalidInetLength(u8), | ||
#[error("UTF8 deserialization failed: {0}")] | ||
UTF8DeserializationError(#[from] std::str::Utf8Error), | ||
#[error(transparent)] | ||
ParseIntError(#[from] std::num::ParseIntError), | ||
Comment on lines
487
to
+491
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this is some remnant from one of your initial implementations. It's not needed anymore AFAIU (I deleted it locally and everything compiled). |
||
} | ||
|
||
impl From<std::io::Error> for LowLevelDeserializationError { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
use super::type_parser; | ||
#[allow(deprecated)] | ||
use crate::cql_to_rust::{FromRow, FromRowError}; | ||
use crate::frame::frame_errors::{ | ||
|
@@ -16,7 +17,8 @@ use crate::frame::value::{ | |
use crate::types::deserialize::result::{RawRowIterator, TypedRowIterator}; | ||
use crate::types::deserialize::row::DeserializeRow; | ||
use crate::types::deserialize::value::{ | ||
mk_deser_err, BuiltinDeserializationErrorKind, DeserializeValue, MapIterator, UdtIterator, | ||
mk_deser_err, BuiltinDeserializationErrorKind, ConstLengthVectorIterator, DeserializeValue, | ||
MapIterator, UdtIterator, VariableLengthVectorIterator, | ||
Comment on lines
+20
to
+21
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔧 Commit |
||
}; | ||
use crate::types::deserialize::{DeserializationError, FrameSlice, TypeCheckError}; | ||
use bytes::{Buf, Bytes}; | ||
|
@@ -81,6 +83,7 @@ pub enum ColumnType<'frame> { | |
Tuple(Vec<ColumnType<'frame>>), | ||
Uuid, | ||
Varint, | ||
Vector(Box<ColumnType<'frame>>, u32), | ||
} | ||
|
||
impl ColumnType<'_> { | ||
|
@@ -128,6 +131,43 @@ impl ColumnType<'_> { | |
} | ||
ColumnType::Uuid => ColumnType::Uuid, | ||
ColumnType::Varint => ColumnType::Varint, | ||
ColumnType::Vector(elem_type, dim) => { | ||
ColumnType::Vector(Box::new(elem_type.into_owned()), dim) | ||
} | ||
} | ||
} | ||
|
||
pub fn type_size(&self) -> Option<usize> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a public method with unclear meaning, please add a docstring. |
||
match self { | ||
ColumnType::Custom(_) => None, | ||
ColumnType::Ascii => None, | ||
ColumnType::Boolean => Some(1), | ||
ColumnType::Blob => None, | ||
ColumnType::Counter => None, | ||
ColumnType::Date => Some(8), | ||
ColumnType::Decimal => None, | ||
ColumnType::Double => Some(8), | ||
ColumnType::Duration => None, | ||
ColumnType::Float => Some(4), | ||
ColumnType::Int => Some(4), | ||
ColumnType::BigInt => Some(8), | ||
ColumnType::Text => None, | ||
ColumnType::Timestamp => Some(8), | ||
ColumnType::Inet => None, | ||
ColumnType::List(_) => None, | ||
ColumnType::Map(_, _) => None, | ||
ColumnType::Set(_) => None, | ||
ColumnType::UserDefinedType { .. } => None, | ||
// Note that although SmallInt and TinyInt is of a fixed size, | ||
// Cassandra (erroneously) treats it as a variable-size | ||
ColumnType::SmallInt => None, | ||
ColumnType::TinyInt => None, | ||
Comment on lines
+161
to
+164
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💭 Perhaps we should file an issue about this, or even submit a fix to Cassandra instead of replicating the bug in ScyllaDB? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand what this function is supposed to return, and why we return a |
||
ColumnType::Time => Some(8), | ||
ColumnType::Timeuuid => Some(16), | ||
ColumnType::Tuple(_) => None, | ||
ColumnType::Uuid => Some(16), | ||
ColumnType::Varint => None, | ||
ColumnType::Vector(_, _) => None, | ||
} | ||
} | ||
} | ||
|
@@ -171,6 +211,7 @@ pub enum CqlValue { | |
Tuple(Vec<Option<CqlValue>>), | ||
Uuid(Uuid), | ||
Varint(CqlVarint), | ||
Vector(Vec<CqlValue>), | ||
} | ||
|
||
impl<'a> TableSpec<'a> { | ||
|
@@ -438,10 +479,18 @@ impl CqlValue { | |
} | ||
} | ||
|
||
pub fn as_vector(&self) -> Option<&Vec<CqlValue>> { | ||
match self { | ||
Self::Vector(s) => Some(s), | ||
_ => None, | ||
} | ||
} | ||
|
||
pub fn into_vec(self) -> Option<Vec<CqlValue>> { | ||
match self { | ||
Self::List(s) => Some(s), | ||
Self::Set(s) => Some(s), | ||
Self::Vector(s) => Some(s), | ||
_ => None, | ||
} | ||
} | ||
|
@@ -864,17 +913,12 @@ fn deser_type_generic<'frame, 'result, StrT: Into<Cow<'result, str>>>( | |
types::read_short(buf).map_err(|err| CqlTypeParseError::TypeIdParseError(err.into()))?; | ||
Ok(match id { | ||
0x0000 => { | ||
// We use types::read_string instead of read_string argument here on purpose. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a bit sad that Cassandra folks didn't bother to add proper support for expressing the vector type in the protocol, instead relying on the custom type... |
||
// Chances are the underlying string is `...DurationType`, in which case | ||
// we don't need to allocate it at all. Only for Custom types | ||
// (which we don't support anyway) do we need to allocate. | ||
// OTOH, the provided `read_string` function deserializes borrowed OR owned string; | ||
// here we want to always deserialize borrowed string. | ||
let type_str = | ||
types::read_string(buf).map_err(CqlTypeParseError::CustomTypeNameParseError)?; | ||
Comment on lines
-867
to
-874
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❓ Why is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed it as well. However, when you simply revert it back to Another observation: why does |
||
match type_str { | ||
"org.apache.cassandra.db.marshal.DurationType" => Duration, | ||
_ => Custom(type_str.to_owned().into()), | ||
let type_str = read_string(buf).map_err(CqlTypeParseError::CustomTypeNameParseError)?; | ||
let type_cow: Cow<'result, str> = type_str.into(); | ||
if let Ok(typ) = type_parser::TypeParser::parse(type_cow.clone()) { | ||
typ | ||
} else { | ||
Custom(type_cow) | ||
} | ||
} | ||
0x0001 => Ascii, | ||
|
@@ -1405,6 +1449,16 @@ pub fn deser_cql_value( | |
.collect::<StdResult<_, _>>()?; | ||
CqlValue::Tuple(t) | ||
} | ||
Vector(elem_type, _dimensions) if elem_type.type_size().is_some() => { | ||
let v = ConstLengthVectorIterator::<'_, '_, CqlValue>::deserialize(typ, v)?; | ||
let v: Vec<CqlValue> = v.collect::<StdResult<_, _>>()?; | ||
CqlValue::Vector(v) | ||
} | ||
Vector(_, _) => { | ||
let v = VariableLengthVectorIterator::<'_, '_, CqlValue>::deserialize(typ, v)?; | ||
let v: Vec<CqlValue> = v.collect::<StdResult<_, _>>()?; | ||
CqlValue::Vector(v) | ||
} | ||
Comment on lines
+1452
to
+1461
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔧 IIUC, this should be added only after Vector deserialization is implemented. |
||
}) | ||
} | ||
|
||
|
@@ -1525,6 +1579,7 @@ mod test_utils { | |
Self::Set(_) => 0x0022, | ||
Self::UserDefinedType { .. } => 0x0030, | ||
Self::Tuple(_) => 0x0031, | ||
Self::Vector(_, _) => 0x0000, | ||
} | ||
} | ||
|
||
|
@@ -1563,6 +1618,10 @@ mod test_utils { | |
ColumnType::List(elem_type) | ColumnType::Set(elem_type) => { | ||
elem_type.serialize(buf)?; | ||
} | ||
ColumnType::Vector(elem_type, dimensions) => { | ||
elem_type.serialize(buf)?; | ||
types::write_short_length(*dimensions as usize, buf)?; | ||
} | ||
ColumnType::Map(key_type, value_type) => { | ||
key_type.serialize(buf)?; | ||
value_type.serialize(buf)?; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔧 Commit
frame: added a parser for Java-class type names
:There must be an empty line after the commit name. Otherwise, git considers the subsequent message continuation of the commit name and displays it in a strange way.