diff --git a/Gemfile.lock b/Gemfile.lock index a3b24ee..bbd41c8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,15 +2,13 @@ PATH remote: . specs: instructor-rb (0.1.0) - activemodel (~> 6.1) - activesupport (~> 6.1) + activesupport (~> 6.1.3) + dry-validation (~> 1.10) ruby-openai (~> 0.1.0) GEM remote: https://rubygems.org/ specs: - activemodel (6.1.7.6) - activesupport (= 6.1.7.6) activesupport (6.1.7.6) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 1.6, < 2) @@ -18,10 +16,44 @@ GEM tzinfo (~> 2.0) zeitwerk (~> 2.3) ast (2.4.2) + bigdecimal (3.1.6) coderay (1.1.3) concurrent-ruby (1.2.3) diff-lcs (1.5.0) dotenv (2.7.6) + dry-configurable (1.1.0) + dry-core (~> 1.0, < 2) + zeitwerk (~> 2.6) + dry-core (1.0.1) + concurrent-ruby (~> 1.0) + zeitwerk (~> 2.6) + dry-inflector (1.0.0) + dry-initializer (3.1.1) + dry-logic (1.5.0) + concurrent-ruby (~> 1.0) + dry-core (~> 1.0, < 2) + zeitwerk (~> 2.6) + dry-schema (1.13.3) + concurrent-ruby (~> 1.0) + dry-configurable (~> 1.0, >= 1.0.1) + dry-core (~> 1.0, < 2) + dry-initializer (~> 3.0) + dry-logic (>= 1.4, < 2) + dry-types (>= 1.7, < 2) + zeitwerk (~> 2.6) + dry-types (1.7.2) + bigdecimal (~> 3.0) + concurrent-ruby (~> 1.0) + dry-core (~> 1.0) + dry-inflector (~> 1.0) + dry-logic (~> 1.4) + zeitwerk (~> 2.6) + dry-validation (1.10.0) + concurrent-ruby (~> 1.0) + dry-core (~> 1.0, < 2) + dry-initializer (~> 3.0) + dry-schema (>= 1.12, < 2) + zeitwerk (~> 2.6) httparty (0.18.1) mime-types (~> 3.0) multi_xml (>= 0.5.2) diff --git a/README.md b/README.md index 8d517a6..995e0f4 100644 --- a/README.md +++ b/README.md @@ -23,37 +23,28 @@ Dive into the world of Ruby-based structured extraction, by OpenAI's function ca ## Usage -```rb -# Todo, change to ruby -import Instructor from "@instructor-ai/instructor"; -import OpenAI from "openai" -import { z } from "zod" - -const UserSchema = z.object({ - age: z.number(), - name: z.string() -}) - -type User = z.infer - -const oai = new OpenAI({ - apiKey: process.env.OPENAI_API_KEY ?? undefined, - organization: process.env.OPENAI_ORG_ID ?? undefined -}) - -const client = Instructor({ - client: oai, - mode: "FUNCTIONS" # or "TOOLS" or "MD_JSON" or "JSON" -}) - -const user = await client.chat.completions.create({ - messages: [{ role: "user", content: "Jason Liu is 30 years old" }], - model: "gpt-3.5-turbo", - response_model: { schema: UserSchema } -}) - -console.log(user) -// { age: 30, name: "Jason Liu" } +```ruby +require 'instructor' + +class UserDetail < Instructor::Model + params do + required(:name).filled(:string) + required(:age).filled(:integer) + end +end + +client = Instructor::OpenAI::Client.new + +user = client.chat( + parameters: { + model: 'gpt-3.5-turbo', + messages: [{ role: 'user', content: 'Extract Jason is 25 years old' }] + }, + response_model: UserDetail +) + +puts(user.inspect) +#=> #"Jason", :age=>25} errors={}> ``` ## Why use Instructor? diff --git a/instructor-rb.gemspec b/instructor-rb.gemspec index ced2c1b..7b1eee1 100644 --- a/instructor-rb.gemspec +++ b/instructor-rb.gemspec @@ -1,24 +1,24 @@ # frozen_string_literal: true -require_relative "lib/instructor/version" +require_relative 'lib/instructor/version' Gem::Specification.new do |spec| - spec.name = "instructor-rb" + spec.name = 'instructor-rb' spec.version = Instructor::VERSION - spec.authors = ["Jason Liu", "Sergio Bayona"] - spec.email = ["jason@jxnl.co", "bayona.sergio@gmail.com"] + spec.authors = ['Jason Liu', 'Sergio Bayona'] + spec.email = ['jason@jxnl.co', 'bayona.sergio@gmail.com'] - spec.summary = "Structured extraction in Ruby, powered by llms." + spec.summary = 'Structured extraction in Ruby, powered by llms.' spec.description = "Explore the power of structured extraction in Ruby with the Instructor gem. Leveraging OpenAI's function calling API." - spec.homepage = "https://github.com/instructor-ai/instructor-rb" - spec.license = "MIT" - spec.required_ruby_version = ">= 2.7.0" + spec.homepage = 'https://github.com/instructor-ai/instructor-rb' + spec.license = 'MIT' + spec.required_ruby_version = '>= 2.7.0' - spec.metadata["allowed_push_host"] = "https://rubygems.pkg.github.com/instructor-ai" + spec.metadata['allowed_push_host'] = 'https://rubygems.pkg.github.com/instructor-ai' - spec.metadata["homepage_uri"] = spec.homepage - spec.metadata["source_code_uri"] = "https://github.com/instructor-ai/instructor-rb" - spec.metadata["changelog_uri"] = "https://github.com/instructor-ai/instructor-rb/blob/main/CHANGELOG.md" + spec.metadata['homepage_uri'] = spec.homepage + spec.metadata['source_code_uri'] = 'https://github.com/instructor-ai/instructor-rb' + spec.metadata['changelog_uri'] = 'https://github.com/instructor-ai/instructor-rb/blob/main/CHANGELOG.md' # Specify which files should be added to the gem when it is released. # The `git ls-files -z` loads the files in the RubyGem that have been added into git. @@ -29,9 +29,9 @@ Gem::Specification.new do |spec| end end - spec.require_paths = ["lib"] + spec.require_paths = ['lib'] - spec.add_dependency "activemodel", "~> 6.1" - spec.add_dependency "activesupport", "~> 6.1" - spec.add_dependency "ruby-openai", "~> 0.1.0" + spec.add_dependency 'activesupport', '~> 6.1.3' + spec.add_dependency 'dry-validation', '~> 1.10' + spec.add_dependency 'ruby-openai', '~> 0.1.0' end diff --git a/lib/instructor.rb b/lib/instructor.rb index 591700b..57346dd 100644 --- a/lib/instructor.rb +++ b/lib/instructor.rb @@ -1,15 +1,11 @@ # frozen_string_literal: true -require "active_model" -require "active_support" -require_relative "instructor/version" -require_relative "instructor/type/array" -require_relative "instructor/model_serializer" -require_relative "instructor/base_model" +require 'openai' +require 'active_support/all' +require_relative 'instructor/version' +require_relative 'instructor/model' +require_relative 'instructor/openai/client' module Instructor class Error < StandardError; end - - # Register the custom array type with ActiveModel - ActiveModel::Type.register(:array, Instructor::Type::Array) end diff --git a/lib/instructor/base_model.rb b/lib/instructor/base_model.rb deleted file mode 100644 index 7ba7e4f..0000000 --- a/lib/instructor/base_model.rb +++ /dev/null @@ -1,11 +0,0 @@ -require 'active_model' - -module Instructor - class BaseModel - include ActiveModel::Model - include ActiveModel::Attributes - include ActiveModel::AttributeAssignment - include ActiveModel::Validations - include ActiveModel::Conversion - end -end diff --git a/lib/instructor/dsl/conditional_require.rb b/lib/instructor/dsl/conditional_require.rb deleted file mode 100644 index f14a0a7..0000000 --- a/lib/instructor/dsl/conditional_require.rb +++ /dev/null @@ -1,55 +0,0 @@ -require 'json' - -module Instructor - module DSL - class ConditionalRequire - def initialize - @structure = {} - end - - def if - @structure['if'] = {} - yield(Property.new(@structure['if'])) - end - - def then - @structure['then'] = {} - yield(Requirement.new(@structure['then'])) - end - - def else - @structure['else'] = {} - yield(Requirement.new(@structure['else'])) - end - - def to_json(*options) - @structure.to_json(*options) - end - - class Property - def initialize(structure) - @structure = structure - end - - def properties - @structure['properties'] = {} - yield(self) - end - - def method_missing(name, *args) - @structure['properties'][name.to_s] = { 'const' => args.first } - end - end - - class Requirement - def initialize(structure) - @structure = structure - end - - def required(*args) - @structure['required'] = args - end - end - end - end -end diff --git a/lib/instructor/model.rb b/lib/instructor/model.rb new file mode 100644 index 0000000..4dfb888 --- /dev/null +++ b/lib/instructor/model.rb @@ -0,0 +1,5 @@ +require 'dry-validation' + +Dry::Schema.load_extensions(:json_schema) +class Instructor::Model < Dry::Validation::Contract +end diff --git a/lib/instructor/model_serializer.rb b/lib/instructor/model_serializer.rb deleted file mode 100644 index ef5b550..0000000 --- a/lib/instructor/model_serializer.rb +++ /dev/null @@ -1,110 +0,0 @@ -require 'active_support/all' - -module Instructor - class ModelSerializer - DATETIME_TYPE_MAPPINGS = { - ActiveModel::Type::Date => 'date', - ActiveModel::Type::DateTime => 'date-time', - ActiveModel::Type::Time => 'time' - }.freeze - - TYPE_MAPPINGS = { - ActiveModel::Type::String => 'string', - ActiveModel::Type::Integer => 'integer', - ActiveModel::Type::Float => 'number', - ActiveModel::Type::Decimal => 'number', - ActiveModel::Type::Boolean => 'boolean', - ActiveModel::Type::Date => 'string', - ActiveModel::Type::DateTime => 'string', - ActiveModel::Type::Time => 'string', - Instructor::Type::Array => 'array' - }.freeze - - def initialize(model) - @model = model - end - - def json_schema - JSON.generate(build_schema) - end - - def build_schema - { - description: "#{model.name.underscore.humanize} model", - type: 'object', - properties: model_attributes, - required: required_attributes - }.compact - end - - private - - attr_reader :model - - def model_attributes - model.attribute_names.each_with_object({}) do |attr_name, attributes| - attribute_type = model.attribute_types[attr_name] - attributes[attr_name] = build_attribute_hash(attr_name, attribute_type) - end - end - - def build_attribute_hash(attr_name, attribute_type) - attribute_hash = { - title: attr_name.humanize, - type: json_type_for(attribute_type.class), - format: format_for(attribute_type), - default: default_value_for(attr_name), - enum: enum_for(attr_name) - }.compact - - if attribute_type.is_a?(Instructor::Type::Array) - attribute_hash[:items] = self.class.new(attribute_type.subtype).build_schema - end - - attribute_hash - end - - def format_for(attribute_type) - return unless datetime_format?(attribute_type) - - DATETIME_TYPE_MAPPINGS[attribute_type.class] - end - - def enum_for(attr_name) - inclusion_validator = model.validators_on(attr_name).detect do |validator| - validator.is_a?(ActiveModel::Validations::InclusionValidator) - end - inclusion_validator&.options&.[](:in) - end - - def datetime_format?(attribute_type) - DATETIME_TYPE_MAPPINGS.keys.include?(attribute_type.class) - end - - def default_value_for(attr_name) - model._default_attributes[attr_name].value_before_type_cast - end - - def default_value?(attr_name) - !model._default_attributes[attr_name].value_before_type_cast.nil? - end - - def required_attributes - attributes = model.attribute_names.select do |attr_name| - required?(attr_name) - end - - attributes.empty? ? nil : attributes - end - - def required?(attr_name) - model.validators_on(attr_name).any? do |validator| - validator.is_a?(ActiveModel::Validations::PresenceValidator) - end - end - - def json_type_for(type) - TYPE_MAPPINGS[type] || 'string' # default type - end - end -end diff --git a/lib/instructor/openai/client.rb b/lib/instructor/openai/client.rb new file mode 100644 index 0000000..1c01fd2 --- /dev/null +++ b/lib/instructor/openai/client.rb @@ -0,0 +1,44 @@ +OpenAI.configure do |config| + config.access_token = ENV.fetch('OPENAI_API_KEY') +end + +module Instructor + module OpenAI + class Client + def initialize + @client = ::OpenAI::Client.new do |f| + f.response :logger, Logger.new($stdout), bodies: true if ENV['OPENAI_LOG'] == 'debug' + end + end + + def chat(parameters:, response_model:) + func = generate_function(response_model) + params = parameters.merge(tools: [func]) + response = @client.chat(parameters: params) + function_response = get_parsed_res(response) + model = response_model.new + model.call(function_response) + end + + def generate_function(model) + { + type: 'function', + function: { + name: model.name.humanize.titleize, + description: generate_description(model), + parameters: model.schema.json_schema + } + } + end + + def get_parsed_res(response) + str = response.dig('choices', 0, 'message', 'tool_calls', 0, 'function', 'arguments') + JSON.parse(str) + end + + def generate_description(model) + "Correctly extracted `#{model.name}` with all the required parameters with correct types" + end + end + end +end diff --git a/lib/instructor/type/array.rb b/lib/instructor/type/array.rb deleted file mode 100644 index cabcf8c..0000000 --- a/lib/instructor/type/array.rb +++ /dev/null @@ -1,70 +0,0 @@ -# frozen_string_literal: true - -# Custom type for arrays. This is needed because the default ActiveModel gem does not have -# sufficient support for arrays. -# -# -# @example -# class PhoneNumber < Instructor::BaseModel -# attribute :number, :string -# end -# -# class User < Instructor::BaseModel -# attribute :phone_numbers, :array, of: PhoneNumber, default: [] -# end -# -# Instructor::ModelSerializer.new(User).json_schema -# # => { -# # "description": "User model", -# # "type": "object", -# # "properties": { -# # "phone_numbers": { -# # "title": "Phone numbers", -# # "type": "array", -# # "default": [], -# # "items": { -# # "description": "Phone number model", -# # "type": "object", -# # "properties": { -# # "number": { -# # "title": "Number", -# # "type": "string" -# # } -# # } -# # } -# # } -# # } -# # } -# -# - -module Instructor - module Type - class Array < ActiveModel::Type::Value - attr_reader :subtype - - def initialize(options={}) - super() - @subtype = options.delete(:of) - end - - def type - :array - end - - def cast(value) - return [] if value.blank? - - value - end - - def deserialize(value) - value - end - - def serialize(value) - value - end - end - end -end diff --git a/spec/basic_spec.rb b/spec/basic_spec.rb new file mode 100644 index 0000000..062cd91 --- /dev/null +++ b/spec/basic_spec.rb @@ -0,0 +1,25 @@ +require 'spec_helper' + +RSpec.describe 'running an OpenAI function call' do + class UserDetail < Instructor::Model + params do + required(:name).filled(:string) + required(:age).filled(:integer) + end + end + + it 'returns an object with the expected valid attribute values' do + client = Instructor::OpenAI::Client.new + + user = client.chat( + parameters: { + model: 'gpt-3.5-turbo', + messages: [{ role: 'user', content: 'Extract Jason is 25 years old' }] + }, + response_model: UserDetail + ) + + expect(user[:name]).to eq('Jason') + expect(user[:age]).to eq(25) + end +end diff --git a/spec/dsl/conditional_require_spec.rb b/spec/dsl/conditional_require_spec.rb deleted file mode 100644 index 6d87f8e..0000000 --- a/spec/dsl/conditional_require_spec.rb +++ /dev/null @@ -1,81 +0,0 @@ -require 'spec_helper' -require_relative '../../lib/instructor/dsl/conditional_require' - -# rubocop:disable Metrics/BlockLength -RSpec.describe Instructor::DSL::ConditionalRequire do - context 'if/then' do - let(:expected_output) do - { - 'if' => { - 'properties' => { - 'type' => { - "const": 'car' - } - } - }, - 'then' => { - "required": %w[ - make - model - ] - } - } - end - - let(:vehicle) { described_class.new } - - it 'outputs if/then clause in json-schema' do - vehicle.if do |condition| - condition.properties do |attr| - attr.type 'car' - end - end - - vehicle.then do |requirement| - requirement.required 'make', 'model' - end - - expect(vehicle.to_json).to eq(expected_output.to_json) - end - end - - context 'if/then/else' do - let(:vehicle) { described_class.new } - - let(:expected_output) do - { - 'if' => { - 'properties' => { - 'type' => { - "const": 'car' - } - } - }, - 'then' => { - "required": %w[ - make - model - ] - } - } - end - - it 'outputs if/then/else clause in json-schema' do - vehicle.if do |condition| - condition.properties do |attr| - attr.type 'car' - end - end - - vehicle.then do |requirement| - requirement.required 'make', 'model' - end - - vehicle.else do |requirement| - requirement.required 'make' - end - - expect(vehicle.to_json).to eq(expected_output.to_json) - end - end -end diff --git a/spec/json_schema_generation_spec.rb b/spec/json_schema_generation_spec.rb deleted file mode 100644 index 13000cf..0000000 --- a/spec/json_schema_generation_spec.rb +++ /dev/null @@ -1,92 +0,0 @@ -require "spec_helper" - -class PhoneNumber < Instructor::BaseModel - attribute :number, :string - attribute :type, :string - - validates :type, inclusion: { in: %w[home work mobile] } -end - -class User < Instructor::BaseModel - attribute :name, :string - attribute :age, :integer - attribute :subscribed, :boolean, default: false - attribute :created_at, :datetime - attribute :gender, :string - attribute :phone_numbers, :array, of: PhoneNumber, default: [] - - validates :name, :age, presence: true - validates :gender, inclusion: { in: %w[male female other] } -end - -RSpec.describe "json-schema serialization" do - subject(:user_detail) { Instructor::ModelSerializer.new(User).json_schema } - - let(:user_schema) do - { - "description": "User model", - "type": "object", - "properties": { - "name": { - "title": "Name", - "type": "string" - }, - "age": { - "title": "Age", - "type": "integer" - }, - "subscribed": { - "title": "Subscribed", - "type": "boolean", - "default": false - }, - "created_at": { - "title": "Created at", - "type": "string", - "format": "date-time" - }, - "gender": { - "title": "Gender", - "type": "string", - "enum": %w[ - male - female - other - ] - }, - "phone_numbers": { - "title": "Phone numbers", - "type": "array", - "default": [], - "items": { - "description": "Phone number model", - "type": "object", - "properties": { - "number": { - "title": "Number", - "type": "string" - }, - "type": { - "title": "Type", - "type": "string", - "enum": %w[ - home - work - mobile - ] - } - } - } - } - }, - "required": %w[ - name - age - ] - } - end - - it "converts to json schema" do - expect(user_detail).to eq(JSON.generate(user_schema)) - end -end diff --git a/spec/type/array_spec.rb b/spec/type/array_spec.rb deleted file mode 100644 index f269502..0000000 --- a/spec/type/array_spec.rb +++ /dev/null @@ -1,31 +0,0 @@ -require "spec_helper" - -RSpec.describe Instructor::Type::Array do - subject(:array_type) { described_class.new } - - it "has type array" do - expect(array_type.type).to eq(:array) - end - - describe "#cast" do - it "returns the expected values" do - expect(array_type.cast([1, 2, 3])).to eq([1, 2, 3]) - end - - it "returns an empty array for nil" do - expect(array_type.cast(nil)).to eq([]) - end - - it "returns an empty array for empty string" do - expect(array_type.cast("")).to eq([]) - end - - context "when the array values are strings" do - subject(:array_type) { described_class.new(of: :string) } - - it "returns the expected values" do - expect(array_type.cast(%w[1 2 3])).to eq(%w[1 2 3]) - end - end - end -end