Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Reputation: CNS-1004 - QoS excellence epoch score aggregation #1612

Open
wants to merge 14 commits into
base: CNS-1003-reputation-proto-definitions
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions proto/lavanet/lava/pairing/relay.proto
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,23 @@ message RelayReply {
}

message QualityOfServiceReport{
// Latency of provider answers in milliseconds, range 0-inf, lower is better
string latency = 1 [
(gogoproto.moretags) = "yaml:\"Latency\"",
(gogoproto.customtype) = "github.com/cosmos/cosmos-sdk/types.Dec",
(gogoproto.nullable) = false
];

// Percentage of times the provider returned a non-error response, range 0-1, higher is better
string availability = 2 [
(gogoproto.moretags) = "yaml:\"availability\"",
(gogoproto.customtype) = "github.com/cosmos/cosmos-sdk/types.Dec",
(gogoproto.nullable) = false
];

// Amount of time the provider is not synced (have the latest block) in milliseconds, range 0-inf, lower is better.
// Example: in ETH we have 15sec block time. So sync = 15000 means that the provider is one block
// behind the actual latest block.
string sync = 3 [
(gogoproto.moretags) = "yaml:\"sync\"",
(gogoproto.customtype) = "github.com/cosmos/cosmos-sdk/types.Dec",
Expand Down
9 changes: 9 additions & 0 deletions testutil/common/tester.go
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,15 @@ func (ts *Tester) GetNextMonth(from time.Time) int64 {
return utils.NextMonth(from).UTC().Unix()
}

func (ts *Tester) BlockTimeDefault() time.Duration {
return ts.Keepers.Downtime.GetParams(ts.Ctx).DowntimeDuration
}

func (ts *Tester) EpochTimeDefault() time.Duration {
epochBlocks := ts.Keepers.Epochstorage.GetParams(ts.Ctx).EpochBlocks
return ts.BlockTimeDefault() * time.Duration(epochBlocks)
}

func (ts *Tester) AdvanceToBlock(block uint64) {
if block < ts.BlockHeight() {
panic("AdvanceToBlock: block in the past: " +
Expand Down
10 changes: 10 additions & 0 deletions utils/convert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package utils

import "math"

func SafeUint64ToInt64Convert(val uint64) int64 {
if val > math.MaxInt64 {
val = math.MaxInt64
}
return int64(val)
}
40 changes: 40 additions & 0 deletions x/pairing/keeper/msg_server_relay_payment.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,46 @@ func (k msgServer) RelayPayment(goCtx context.Context, msg *types.MsgRelayPaymen
k.handleBadgeCu(ctx, badgeData, relay.Provider, relay.CuSum, newBadgeTimerExpiry)
}

// update the reputation's epoch QoS score
// the excellece QoS report can be nil when the provider and consumer geolocations are not equal
if relay.QosExcellenceReport != nil {
sub, found := k.subscriptionKeeper.GetSubscription(ctx, project.Subscription)
if !found {
return nil, utils.LavaFormatError("RelayPayment: could not get cluster for reputation score update", fmt.Errorf("relay consumer's subscription not found"),
utils.LogAttr("consumer", clientAddr),
utils.LogAttr("project", project.Index),
utils.LogAttr("subscription", project.Subscription),
utils.LogAttr("chain", relay.SpecId),
utils.LogAttr("provider", relay.Provider),
)
}

syncFactor := k.ReputationLatencyOverSyncFactor(ctx)
score, err := relay.QosExcellenceReport.ComputeQosExcellenceForReputation(syncFactor)
if err != nil {
return nil, utils.LavaFormatWarning("RelayPayment: could not compute qos excellence score", err,
utils.LogAttr("consumer", clientAddr),
utils.LogAttr("chain", relay.SpecId),
utils.LogAttr("provider", relay.Provider),
utils.LogAttr("qos_excellence_report", relay.QosExcellenceReport.String()),
utils.LogAttr("sync_factor", syncFactor.String()),
)
}

stakeEntry, found := k.epochStorageKeeper.GetStakeEntryCurrent(ctx, relay.SpecId, relay.Provider)
if !found {
return nil, utils.LavaFormatWarning("RelayPayment: could not get stake entry for reputation", fmt.Errorf("stake entry not found"),
utils.LogAttr("consumer", clientAddr),
utils.LogAttr("chain", relay.SpecId),
utils.LogAttr("provider", relay.Provider),
)
}
effectiveStake := sdk.NewCoin(stakeEntry.Stake.Denom, stakeEntry.EffectiveStake())

// note the current weight used is by relay num. In the future, it might change
k.UpdateReputationEpochQosScore(ctx, relay.SpecId, sub.Cluster, relay.Provider, score, utils.SafeUint64ToInt64Convert(relay.RelayNum), effectiveStake)
}

// TODO: add support for spec changes
spec, found := k.specKeeper.GetSpec(ctx, relay.SpecId)
if !found || !spec.Enabled {
Expand Down
194 changes: 194 additions & 0 deletions x/pairing/keeper/msg_server_relay_payment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1063,3 +1063,197 @@ func TestPairingCaching(t *testing.T) {
require.Equal(t, totalCU*3, sub.Sub.MonthCuTotal-sub.Sub.MonthCuLeft)
}
}

// TestUpdateReputationEpochQosScore tests the update of the reputation's epoch qos score
// Scenarios:
// 1. provider1 sends relay -> its reputation is updated (epoch score and time last updated),
// also, provider2 reputation is not updated
func TestUpdateReputationEpochQosScore(t *testing.T) {
ts := newTester(t)
ts.setupForPayments(2, 1, 0) // 2 providers, 1 client, default providers-to-pair

consumerAcc, consumer := ts.GetAccount(common.CONSUMER, 0)
_, provider1 := ts.GetAccount(common.PROVIDER, 0)
_, provider2 := ts.GetAccount(common.PROVIDER, 1)
qos := &types.QualityOfServiceReport{
Latency: sdk.ZeroDec(),
Availability: sdk.OneDec(),
Sync: sdk.ZeroDec(),
}

res, err := ts.QuerySubscriptionCurrent(consumer)
require.NoError(t, err)
cluster := res.Sub.Cluster

// set default reputations for both providers. Advance epoch to change the current block time
ts.Keepers.Pairing.SetReputation(ts.Ctx, ts.spec.Index, cluster, provider1, types.NewReputation(ts.Ctx))
ts.Keepers.Pairing.SetReputation(ts.Ctx, ts.spec.Index, cluster, provider2, types.NewReputation(ts.Ctx))
ts.AdvanceEpoch()

// send relay payment msg from provider1
relaySession := ts.newRelaySession(provider1, 0, 100, ts.BlockHeight(), 1)
relaySession.QosExcellenceReport = qos
sig, err := sigs.Sign(consumerAcc.SK, *relaySession)
require.NoError(t, err)
relaySession.Sig = sig

payment := types.MsgRelayPayment{
Creator: provider1,
Relays: []*types.RelaySession{relaySession},
}
ts.relayPaymentWithoutPay(payment, true)

// get both providers reputation: provider1 should have its epoch score and time last updated changed,
// provider2 should have nothing change from the default
r1, found := ts.Keepers.Pairing.GetReputation(ts.Ctx, ts.spec.Index, cluster, provider1)
require.True(t, found)
r2, found := ts.Keepers.Pairing.GetReputation(ts.Ctx, ts.spec.Index, cluster, provider2)
require.True(t, found)

require.Greater(t, r1.TimeLastUpdated, r2.TimeLastUpdated)
epochScore1, err := r1.EpochScore.Score.Resolve()
require.NoError(t, err)
epochScore2, err := r2.EpochScore.Score.Resolve()
require.NoError(t, err)
variance1, err := r1.EpochScore.Variance.Resolve()
require.NoError(t, err)
variance2, err := r2.EpochScore.Variance.Resolve()
require.NoError(t, err)
require.True(t, epochScore1.LT(epochScore2)) // score is lower because QoS is excellent
require.True(t, variance1.GT(variance2)) // variance is higher because the QoS is significantly differnet from DefaultQos

entry, found := ts.Keepers.Epochstorage.GetStakeEntryByAddressCurrent(ts.Ctx, ts.spec.Index, provider1)
require.True(t, found)
require.True(t, entry.Stake.IsEqual(r1.Stake))
}

// TestUpdateReputationEpochQosScoreTruncation tests the following scenarios:
// 1. stabilization period has not passed -> no truncation
// 2. stabilization period passed -> with truncation (score update is smaller than the first one)
// note, this test works since we use a bad QoS report (compared to default) so we know that the score should
// increase (which is considered worse)
func TestUpdateReputationEpochQosScoreTruncation(t *testing.T) {
// these will be used to compare the score change with/without truncation
scoreUpdates := []sdk.Dec{}

// we set the stabilization period to 2 epochs time. Advancing one epoch means we won't truncate,
// advancing 3 means we will truncate.
epochsToAdvance := []uint64{1, 3}

for i := range epochsToAdvance {
ts := newTester(t)
ts.setupForPayments(1, 1, 0) // 1 provider, 1 client, default providers-to-pair

consumerAcc, consumer := ts.GetAccount(common.CONSUMER, 0)
_, provider1 := ts.GetAccount(common.PROVIDER, 0)
qos := &types.QualityOfServiceReport{
Latency: sdk.NewDec(1000),
Availability: sdk.OneDec(),
Sync: sdk.NewDec(1000),
}

resQCurrent, err := ts.QuerySubscriptionCurrent(consumer)
require.NoError(t, err)
cluster := resQCurrent.Sub.Cluster

// set stabilization period to be 2*epoch time
resQParams, err := ts.Keepers.Pairing.Params(ts.GoCtx, &types.QueryParamsRequest{})
require.NoError(t, err)
resQParams.Params.ReputationVarianceStabilizationPeriod = int64(ts.EpochTimeDefault().Seconds())
ts.Keepers.Pairing.SetParams(ts.Ctx, resQParams.Params)

// set default reputation
ts.Keepers.Pairing.SetReputation(ts.Ctx, ts.spec.Index, cluster, provider1, types.NewReputation(ts.Ctx))

// advance epochs
ts.AdvanceEpochs(epochsToAdvance[i])

// send relay payment msg from provider1
relaySession := ts.newRelaySession(provider1, 0, 100, ts.BlockHeight(), 1)
relaySession.QosExcellenceReport = qos
sig, err := sigs.Sign(consumerAcc.SK, *relaySession)
require.NoError(t, err)
relaySession.Sig = sig

payment := types.MsgRelayPayment{
Creator: provider1,
Relays: []*types.RelaySession{relaySession},
}
ts.relayPaymentWithoutPay(payment, true)

// get update of epoch score
r, found := ts.Keepers.Pairing.GetReputation(ts.Ctx, ts.spec.Index, cluster, provider1)
require.True(t, found)
epochScoreNoTruncation, err := r.EpochScore.Score.Resolve()
require.NoError(t, err)
defaultEpochScore, err := types.ZeroQosScore.Score.Resolve()
require.NoError(t, err)
scoreUpdates = append(scoreUpdates, epochScoreNoTruncation.Sub(defaultEpochScore))
}

// require that the score update that was not truncated is larger than the one that was truncated
require.True(t, scoreUpdates[0].GT(scoreUpdates[1]))
}

// TestUpdateReputationEpochQosScoreTruncation tests the following scenario:
// 1. relay num is the reputation update weight. More relays = bigger update
func TestUpdateReputationEpochQosScoreRelayNumWeight(t *testing.T) {
// these will be used to compare the score change with high/low relay numbers
scoreUpdates := []sdk.Dec{}

// we set the stabilization period to 2 epochs time. Advancing one epoch means we won't truncate,
// advancing 3 means we will truncate.
relayNums := []uint64{100, 1}

for i := range relayNums {
ts := newTester(t)
ts.setupForPayments(1, 1, 0) // 1 provider, 1 client, default providers-to-pair

consumerAcc, consumer := ts.GetAccount(common.CONSUMER, 0)
_, provider1 := ts.GetAccount(common.PROVIDER, 0)
qos := &types.QualityOfServiceReport{
Latency: sdk.NewDec(1000),
Availability: sdk.OneDec(),
Sync: sdk.NewDec(1000),
}

resQCurrent, err := ts.QuerySubscriptionCurrent(consumer)
require.NoError(t, err)
cluster := resQCurrent.Sub.Cluster

// set stabilization period to be 2*epoch time to avoid truncation
resQParams, err := ts.Keepers.Pairing.Params(ts.GoCtx, &types.QueryParamsRequest{})
require.NoError(t, err)
resQParams.Params.ReputationVarianceStabilizationPeriod = int64(ts.EpochTimeDefault().Seconds())
ts.Keepers.Pairing.SetParams(ts.Ctx, resQParams.Params)

// set default reputation
ts.Keepers.Pairing.SetReputation(ts.Ctx, ts.spec.Index, cluster, provider1, types.NewReputation(ts.Ctx))
ts.AdvanceEpoch()

// send relay payment msg from provider1
relaySession := ts.newRelaySession(provider1, 0, 100, ts.BlockHeight(), relayNums[i])
relaySession.QosExcellenceReport = qos
sig, err := sigs.Sign(consumerAcc.SK, *relaySession)
require.NoError(t, err)
relaySession.Sig = sig

payment := types.MsgRelayPayment{
Creator: provider1,
Relays: []*types.RelaySession{relaySession},
}
ts.relayPaymentWithoutPay(payment, true)

// get update of epoch score
r, found := ts.Keepers.Pairing.GetReputation(ts.Ctx, ts.spec.Index, cluster, provider1)
require.True(t, found)
epochScoreNoTruncation, err := r.EpochScore.Score.Resolve()
require.NoError(t, err)
defaultEpochScore, err := types.ZeroQosScore.Score.Resolve()
require.NoError(t, err)
scoreUpdates = append(scoreUpdates, epochScoreNoTruncation.Sub(defaultEpochScore))
}

// require that the score update that was with 1000 relay num is larger than the one with one relay num
require.True(t, scoreUpdates[0].GT(scoreUpdates[1]))
}
26 changes: 26 additions & 0 deletions x/pairing/keeper/reputation.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,32 @@ func (k Keeper) GetAllReputation(ctx sdk.Context) []types.ReputationGenesis {
return entries
}

// UpdateReputationEpochQosScore updates the epoch QoS score of the provider's reputation using the score from the relay
// payment's QoS excellence report
func (k Keeper) UpdateReputationEpochQosScore(ctx sdk.Context, chainID string, cluster string, provider string, score math.LegacyDec, weight int64, stake sdk.Coin) {
// get current reputation and get parameters for the epoch score update
r, found := k.GetReputation(ctx, chainID, cluster, provider)
truncate := false
if found {
stabilizationPeriod := k.ReputationVarianceStabilizationPeriod(ctx)
if r.ShouldTruncate(stabilizationPeriod, ctx.BlockTime().UTC().Unix()) {
truncate = true
}
} else {
// new reputation score is not truncated and its decay factor is equal to 1
r = types.NewReputation(ctx)
}

// calculate the updated QoS epoch score
updatedEpochScore := r.EpochScore.Update(score, truncate, weight)

// update the reputation and set
r.EpochScore = updatedEpochScore
r.TimeLastUpdated = ctx.BlockTime().UTC().Unix()
r.Stake = stake
k.SetReputation(ctx, chainID, cluster, provider, r)
}

// GetReputationScore returns the current reputation pairing score
func (k Keeper) GetReputationScore(ctx sdk.Context, chainID string, cluster string, provider string) (val math.LegacyDec, found bool) {
block := uint64(ctx.BlockHeight())
Expand Down
26 changes: 26 additions & 0 deletions x/pairing/types/QualityOfServiceReport.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package types
import (
"fmt"

"cosmossdk.io/math"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/lavanet/lava/v3/utils"
)

func (qos *QualityOfServiceReport) ComputeQoS() (sdk.Dec, error) {
Expand All @@ -24,3 +26,27 @@ func (qos *QualityOfServiceReport) ComputeQoSExcellence() (sdk.Dec, error) {
}
return qos.Availability.Quo(qos.Sync).Quo(qos.Latency).ApproxRoot(3)
}

// ComputeQosExcellenceForReputation computes the score from the QoS excellence report to update the provider's reputation
// report score = latency + sync*syncFactor + ((1/availability) - 1) * FailureCost (note: larger value is worse)
func (qos QualityOfServiceReport) ComputeQosExcellenceForReputation(syncFactor math.LegacyDec) (math.LegacyDec, error) {
if qos.Availability.LT(sdk.ZeroDec()) ||
qos.Latency.LT(sdk.ZeroDec()) ||
qos.Sync.LT(sdk.ZeroDec()) || syncFactor.LT(sdk.ZeroDec()) {
return sdk.ZeroDec(), utils.LavaFormatWarning("ComputeQosExcellenceForReputation: compute failed", fmt.Errorf("QoS excellence scores is below 0"),
utils.LogAttr("availability", qos.Availability.String()),
utils.LogAttr("sync", qos.Sync.String()),
utils.LogAttr("latency", qos.Latency.String()),
)
}

latency := qos.Latency
sync := qos.Sync.Mul(syncFactor)
availability := math.LegacyNewDec(FailureCost)
if !qos.Availability.IsZero() {
availability = availability.Mul((math.LegacyOneDec().Quo(qos.Availability)).Sub(math.LegacyOneDec()))
} else {
availability = math.LegacyMaxSortableDec.QuoInt64(2) // on qs.Availability = 0 we take the largest score possible
}
return latency.Add(sync).Add(availability), nil
}
Loading
Loading