-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This supports a consistent API for converting text into embeddings (vectors).
- Loading branch information
Showing
11 changed files
with
369 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
PATH | ||
remote: . | ||
specs: | ||
omniai (1.6.6) | ||
omniai (1.7.0) | ||
event_stream_parser | ||
http | ||
zeitwerk | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
# frozen_string_literal: true | ||
|
||
module OmniAI | ||
# An abstract class that provides a consistent interface for processing embedding requests. | ||
# | ||
# Usage: | ||
# | ||
# class OmniAI::OpenAI::Embed < OmniAI::Embed | ||
# module Model | ||
# SMALL = "text-embedding-3-small" | ||
# LARGE = "text-embedding-3-large" | ||
# ADA = "text-embedding-3-002" | ||
# end | ||
# | ||
# protected | ||
# | ||
# # @return [Hash] | ||
# def payload | ||
# { ... } | ||
# end | ||
# | ||
# # @return [String] | ||
# def path | ||
# "..." | ||
# end | ||
# end | ||
# | ||
# client.embed(input, model: "...") | ||
class Embed | ||
def self.process!(...) | ||
new(...).process! | ||
end | ||
|
||
# @param input [String] required | ||
# @param client [Client] the client | ||
# @param model [String] required | ||
# | ||
# @return [Response] | ||
def initialize(input, client:, model:) | ||
@input = input | ||
@client = client | ||
@model = model | ||
end | ||
|
||
# @raise [Error] | ||
# @return [Response] | ||
def process! | ||
response = request! | ||
raise HTTPError, response.flush unless response.status.ok? | ||
|
||
parse!(response:) | ||
end | ||
|
||
protected | ||
|
||
# @param response [HTTP::Response] | ||
# @return [Response] | ||
def parse!(response:) | ||
Response.new(data: response.parse) | ||
end | ||
|
||
# @return [HTTP::Response] | ||
def request! | ||
@client | ||
.connection | ||
.accept(:json) | ||
.post(path, json: payload) | ||
end | ||
|
||
# @return [Hash] | ||
def payload | ||
raise NotImplementedError, "#{self.class.name}#payload undefined" | ||
end | ||
|
||
# @return [String] | ||
def path | ||
raise NotImplementedError, "#{self.class.name}#path undefined" | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# frozen_string_literal: true | ||
|
||
module OmniAI | ||
class Embed | ||
# The response returned by the API. | ||
class Response | ||
# @return [Hash] | ||
attr_accessor :data | ||
|
||
# @param data [Hash] | ||
# @param context [OmniAI::Context] optional | ||
def initialize(data:, context: nil) | ||
@data = data | ||
@context = context | ||
end | ||
|
||
# @return [String] | ||
def inspect | ||
"#<#{self.class.name}>" | ||
end | ||
|
||
# @return [Usage] | ||
def usage | ||
@usage ||= begin | ||
deserializer = @context&.deserializers&.[](:usage) | ||
|
||
if deserializer | ||
deserializer.call(@data, context: @context) | ||
else | ||
prompt_tokens = @data.dig('usage', 'prompt_tokens') | ||
total_tokens = @data.dig('usage', 'total_tokens') | ||
|
||
Usage.new(prompt_tokens:, total_tokens:) | ||
end | ||
end | ||
end | ||
|
||
# @param index [Integer] optional | ||
# | ||
# @return [Array<Float>] | ||
def embedding(index: 0) | ||
embeddings[index] | ||
end | ||
|
||
# @return [Array<Array<Float>>] | ||
def embeddings | ||
@embeddings ||= begin | ||
deserializer = @context&.deserializers&.[](:embeddings) | ||
|
||
if deserializer | ||
deserializer.call(@data, context: @context) | ||
else | ||
@data['data'].map { |embedding| embedding['embedding'] } | ||
end | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# frozen_string_literal: true | ||
|
||
module OmniAI | ||
class Embed | ||
# Token usage returned by the API. | ||
class Usage | ||
# @return [Integer] | ||
attr_accessor :prompt_tokens | ||
|
||
# @return [Integer] | ||
attr_accessor :total_tokens | ||
|
||
# @param prompt_tokens Integer | ||
# @param total_tokens Integer | ||
def initialize(prompt_tokens:, total_tokens:) | ||
@prompt_tokens = prompt_tokens | ||
@total_tokens = total_tokens | ||
end | ||
|
||
# @return [String] | ||
def inspect | ||
"#<#{self.class.name} prompt_tokens=#{@prompt_tokens} total_tokens=#{@total_tokens}>" | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# frozen_string_literal: true | ||
|
||
module OmniAI | ||
VERSION = '1.6.6' | ||
VERSION = '1.7.0' | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# frozen_string_literal: true | ||
|
||
RSpec.describe OmniAI::Embed::Response do | ||
subject(:response) { described_class.new(data:, context:) } | ||
|
||
let(:context) { nil } | ||
let(:data) do | ||
{ | ||
'data' => [{ 'embedding' => [0.0] }], | ||
'usage' => { | ||
'prompt_tokens' => 2, | ||
'total_tokens' => 4, | ||
}, | ||
} | ||
end | ||
|
||
describe '#inspect' do | ||
it { expect(response.inspect).to eql('#<OmniAI::Embed::Response>') } | ||
end | ||
|
||
describe '#embedding' do | ||
context 'without a context' do | ||
let(:context) { nil } | ||
|
||
it { expect(response.embedding).to eql([0.0]) } | ||
end | ||
|
||
context 'with a context' do | ||
let(:context) do | ||
OmniAI::Context.build do |context| | ||
context.deserializers[:embeddings] = lambda { |data, *| | ||
data['data'].map { |entry| entry['embedding'] } | ||
} | ||
end | ||
end | ||
|
||
it { expect(response.embedding).to eql([0.0]) } | ||
end | ||
end | ||
|
||
describe '#usage' do | ||
it { expect(response.usage).to be_a(OmniAI::Embed::Usage) } | ||
it { expect(response.usage.prompt_tokens).to be(2) } | ||
it { expect(response.usage.total_tokens).to be(4) } | ||
|
||
context 'without a context' do | ||
let(:context) { nil } | ||
|
||
it { expect(response.usage).to be_a(OmniAI::Embed::Usage) } | ||
it { expect(response.usage.prompt_tokens).to be(2) } | ||
it { expect(response.usage.total_tokens).to be(4) } | ||
end | ||
|
||
context 'with a context' do | ||
let(:context) do | ||
OmniAI::Context.build do |context| | ||
context.deserializers[:usage] = lambda { |data, *| | ||
prompt_tokens = data['usage']['prompt_tokens'] | ||
total_tokens = data['usage']['total_tokens'] | ||
OmniAI::Embed::Usage.new(prompt_tokens:, total_tokens:) | ||
} | ||
end | ||
end | ||
|
||
it { expect(response.usage).to be_a(OmniAI::Embed::Usage) } | ||
it { expect(response.usage.prompt_tokens).to be(2) } | ||
it { expect(response.usage.total_tokens).to be(4) } | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# frozen_string_literal: true | ||
|
||
RSpec.describe OmniAI::Embed::Usage do | ||
subject(:usage) { described_class.new(prompt_tokens:, total_tokens:) } | ||
|
||
let(:prompt_tokens) { 2 } | ||
let(:total_tokens) { 4 } | ||
|
||
describe '#inspect' do | ||
it { expect(usage.inspect).to eql('#<OmniAI::Embed::Usage prompt_tokens=2 total_tokens=4>') } | ||
end | ||
|
||
describe '#prompt_tokens' do | ||
it { expect(usage.prompt_tokens).to be(2) } | ||
end | ||
|
||
describe '#total_tokens' do | ||
it { expect(usage.total_tokens).to be(4) } | ||
end | ||
end |
Oops, something went wrong.