Skip to content

Commit

Permalink
support allowDecimalPrecisionLoss
Browse files Browse the repository at this point in the history
Signed-off-by: Yuan Zhou <[email protected]>
  • Loading branch information
zhouyuan committed May 6, 2024
1 parent 9e2ec55 commit 67fa019
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 13 deletions.
2 changes: 2 additions & 0 deletions cpp/core/config/GlutenConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ const std::string kLegacySize = "spark.sql.legacy.sizeOfNull";

const std::string kSessionTimezone = "spark.sql.session.timeZone";

const std::string kAllowPrecisionLoss = "spark.sql.decimalOperations.allowPrecisionLoss";

const std::string kIgnoreMissingFiles = "spark.sql.files.ignoreMissingFiles";

const std::string kDefaultSessionTimezone = "spark.gluten.sql.session.timeZone.default";
Expand Down
2 changes: 2 additions & 0 deletions cpp/velox/compute/WholeStageResultIterator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,8 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
}
// Adjust timestamp according to the above configured session timezone.
configs[velox::core::QueryConfig::kAdjustTimestampToTimezone] = "true";
// To align with Spark's behavior, allow decimal precision loss or not.
configs[velox::core::QueryConfig::kAllowPrecisionLoss] = veloxCfg_->get<std::string>(kAllowPrecisionLoss, "true");
// Align Velox size function with Spark.
configs[velox::core::QueryConfig::kSparkLegacySizeOfNull] = std::to_string(veloxCfg_->get<bool>(kLegacySize, true));

Expand Down
4 changes: 2 additions & 2 deletions ep/build-velox/src/get_velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

set -exu

VELOX_REPO=https://github.com/oap-project/velox.git
VELOX_BRANCH=2024_05_06
VELOX_REPO=https://github.com/zhouyuan/velox.git
VELOX_BRANCH=wip_decimal_precision_loss
VELOX_HOME=""

#Set on run gluten on HDFS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -511,14 +511,6 @@ object ExpressionConverter extends SQLConfHelper with Logging {
replaceWithExpressionTransformerInternal(_, attributeSeq, expressionsMap)),
expr)
case b: BinaryArithmetic if DecimalArithmeticUtil.isDecimalArithmetic(b) =>
// PrecisionLoss=true: velox support / ch not support
// PrecisionLoss=false: velox not support / ch support
// TODO ch support PrecisionLoss=true
if (!BackendsApiManager.getSettings.allowDecimalArithmetic) {
throw new GlutenNotSupportException(
s"Not support ${SQLConf.DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key} " +
s"${conf.decimalOperationsAllowPrecisionLoss} mode")
}
val rescaleBinary = if (BackendsApiManager.getSettings.rescaleDecimalLiteral) {
DecimalArithmeticUtil.rescaleLiteral(b)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.apache.gluten.expression.{CheckOverflowTransformer, ChildTransformer,

import org.apache.spark.sql.catalyst.analysis.DecimalPrecision
import org.apache.spark.sql.catalyst.expressions.{Add, BinaryArithmetic, Cast, Divide, Expression, Literal, Multiply, Pmod, PromotePrecision, Remainder, Subtract}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, IntegerType, LongType, ShortType}

object DecimalArithmeticUtil {
Expand All @@ -33,12 +34,14 @@ object DecimalArithmeticUtil {

val MIN_ADJUSTED_SCALE = 6
val MAX_PRECISION = 38
val MAX_SCALE = 38

// Returns the result decimal type of a decimal arithmetic computing.
def getResultTypeForOperation(
operationType: OperationType.Config,
type1: DecimalType,
type2: DecimalType): DecimalType = {
val allowPrecisionLoss = SQLConf.get.decimalOperationsAllowPrecisionLoss
var resultScale = 0
var resultPrecision = 0
operationType match {
Expand All @@ -54,16 +57,32 @@ object DecimalArithmeticUtil {
resultScale = type1.scale + type2.scale
resultPrecision = type1.precision + type2.precision + 1
case OperationType.DIVIDE =>
resultScale = Math.max(MIN_ADJUSTED_SCALE, type1.scale + type2.precision + 1)
resultPrecision = type1.precision - type1.scale + type2.scale + resultScale
if (allowPrecisionLoss) {
resultScale = Math.max(MIN_ADJUSTED_SCALE, type1.scale + type2.precision + 1)
resultPrecision = type1.precision - type1.scale + type2.scale + resultScale
} else {
var intDig = Math.min(MAX_SCALE, type1.precision - type1.scale + type2.scale)
var decDig = Math.min(MAX_SCALE, Math.max(6, type1.scale + type2.precision + 1))
val diff = (intDig + decDig) - MAX_SCALE
if (diff > 0) {
decDig -= diff / 2 + 1
intDig = MAX_SCALE - decDig
}
resultScale = intDig + decDig
resultPrecision = decDig
}
case OperationType.MOD =>
resultScale = Math.max(type1.scale, type2.scale)
resultPrecision =
Math.min(type1.precision - type1.scale, type2.precision - type2.scale + resultScale)
case other =>
throw new GlutenNotSupportException(s"$other is not supported.")
}
adjustScaleIfNeeded(resultPrecision, resultScale)
if (allowPrecisionLoss) {
adjustScaleIfNeeded(resultPrecision, resultScale)
} else {
bounded(resultPrecision, resultScale)
}
}

// Returns the adjusted decimal type when the precision is larger the maximum.
Expand All @@ -79,6 +98,10 @@ object DecimalArithmeticUtil {
DecimalType(typePrecision, typeScale)
}

def bounded(precision: Int, scale: Int): DecimalType = {
DecimalType(Math.min(precision, MAX_PRECISION), Math.min(scale, MAX_SCALE))
}

// If casting between DecimalType, unnecessary cast is skipped to avoid data loss,
// because argument input type of "cast" is actually the res type of "+-*/".
// Cast will use a wider input type, then calculates result type with less scale than expected.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,7 @@ object GlutenConfig {
GLUTEN_DEFAULT_SESSION_TIMEZONE_KEY,
SQLConf.LEGACY_SIZE_OF_NULL.key,
"spark.io.compression.codec",
"spark.sql.decimalOperations.allowPrecisionLoss",
COLUMNAR_VELOX_BLOOM_FILTER_EXPECTED_NUM_ITEMS.key,
COLUMNAR_VELOX_BLOOM_FILTER_NUM_BITS.key,
COLUMNAR_VELOX_BLOOM_FILTER_MAX_NUM_BITS.key,
Expand Down

0 comments on commit 67fa019

Please sign in to comment.