diff --git a/docs/core_docs/docs/integrations/chat/anthropic.ipynb b/docs/core_docs/docs/integrations/chat/anthropic.ipynb index 6b431821c4db..081fc87c6f2c 100644 --- a/docs/core_docs/docs/integrations/chat/anthropic.ipynb +++ b/docs/core_docs/docs/integrations/chat/anthropic.ipynb @@ -509,7 +509,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "id": "bba739ed", "metadata": {}, "outputs": [], @@ -783,7 +783,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "6e47de9b", "metadata": {}, "outputs": [ @@ -793,16 +793,15 @@ "text": [ "USAGE: {\n", " input_tokens: 19,\n", - " cache_creation_input_tokens: 2925,\n", + " cache_creation_input_tokens: 2921,\n", " cache_read_input_tokens: 0,\n", - " output_tokens: 327\n", + " output_tokens: 355\n", "}\n" ] } ], "source": [ "import { ChatAnthropic } from \"@langchain/anthropic\";\n", - "import { HumanMessage, SystemMessage } from \"@langchain/core/messages\";\n", "\n", "const modelWithCaching = new ChatAnthropic({\n", " model: \"claude-3-haiku-20240307\",\n", @@ -820,7 +819,8 @@ "${CACHED_TEXT}`;\n", "\n", "const messages = [\n", - " new SystemMessage({\n", + " {\n", + " role: \"system\",\n", " content: [\n", " {\n", " type: \"text\",\n", @@ -829,10 +829,11 @@ " cache_control: { type: \"ephemeral\" },\n", " },\n", " ],\n", - " }),\n", - " new HumanMessage({\n", + " },\n", + " {\n", + " role: \"user\",\n", " content: \"What types of messages are supported in LangChain?\",\n", - " }),\n", + " },\n", "];\n", "\n", "const res = await modelWithCaching.invoke(messages);\n", @@ -852,7 +853,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "5d264f8b", "metadata": {}, "outputs": [ @@ -863,8 +864,8 @@ "USAGE: {\n", " input_tokens: 19,\n", " cache_creation_input_tokens: 0,\n", - " cache_read_input_tokens: 2925,\n", - " output_tokens: 250\n", + " cache_read_input_tokens: 2921,\n", + " output_tokens: 357\n", "}\n" ] } @@ -961,6 +962,198 @@ "await modelWithCustomClient.invoke([{ role: \"user\", content: \"Hello!\" }]);" ] }, + { + "cell_type": "markdown", + "id": "68a85a61", + "metadata": {}, + "source": [ + "## Citations\n", + "\n", + "Anthropic supports a [citations](https://docs.anthropic.com/en/docs/build-with-claude/citations) feature that lets Claude attach context to its answers based on source documents supplied by the user. When [document content blocks](https://docs.anthropic.com/en/docs/build-with-claude/citations#document-types) with `\"citations\": {\"enabled\": True}` are included in a query, Claude may generate citations in its response.\n", + "\n", + "### Simple example\n", + "\n", + "In this example we pass a [plain text document](https://docs.anthropic.com/en/docs/build-with-claude/citations#plain-text-documents). In the background, Claude [automatically chunks](https://docs.anthropic.com/en/docs/build-with-claude/citations#plain-text-documents) the input text into sentences, which are used when generating citations." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d3f1c754", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": \"Based on the document, I can tell you that:\\n\\n- \"\n", + " },\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": \"The grass is green\",\n", + " \"citations\": [\n", + " {\n", + " \"type\": \"char_location\",\n", + " \"cited_text\": \"The grass is green. \",\n", + " \"document_index\": 0,\n", + " \"document_title\": \"My Document\",\n", + " \"start_char_index\": 0,\n", + " \"end_char_index\": 20\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": \"\\n- \"\n", + " },\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": \"The sky is blue\",\n", + " \"citations\": [\n", + " {\n", + " \"type\": \"char_location\",\n", + " \"cited_text\": \"The sky is blue.\",\n", + " \"document_index\": 0,\n", + " \"document_title\": \"My Document\",\n", + " \"start_char_index\": 20,\n", + " \"end_char_index\": 36\n", + " }\n", + " ]\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "import { ChatAnthropic } from \"@langchain/anthropic\";\n", + "\n", + "const citationsModel = new ChatAnthropic({\n", + " model: \"claude-3-5-haiku-latest\",\n", + "});\n", + "\n", + "const messagesWithCitations = [\n", + " {\n", + " role: \"user\",\n", + " content: [\n", + " {\n", + " type: \"document\",\n", + " source: {\n", + " type: \"text\",\n", + " media_type: \"text/plain\",\n", + " data: \"The grass is green. The sky is blue.\",\n", + " },\n", + " title: \"My Document\",\n", + " context: \"This is a trustworthy document.\",\n", + " citations: {\n", + " enabled: true,\n", + " },\n", + " },\n", + " {\n", + " type: \"text\",\n", + " text: \"What color is the grass and sky?\",\n", + " },\n", + " ],\n", + " }\n", + "];\n", + "\n", + "const responseWithCitations = await citationsModel.invoke(messagesWithCitations);\n", + "\n", + "console.log(JSON.stringify(responseWithCitations.content, null, 2));" + ] + }, + { + "cell_type": "markdown", + "id": "14269f15", + "metadata": {}, + "source": [ + "### Using with text splitters\n", + "\n", + "Anthropic also lets you specify your own splits using [custom document](https://docs.anthropic.com/en/docs/build-with-claude/citations#custom-content-documents) types. LangChain [text splitters](/docs/concepts/text_splitters/) can be used to generate meaningful splits for this purpose. See the below example, where we split the LangChain.js README (a markdown document) and pass it to Claude as context:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e9f3213", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": \"Based on the documentation, I can provide you with a link to LangChain's tutorials:\\n\\n\"\n", + " },\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": \"The tutorials can be found at: https://js.langchain.com/docs/tutorials/\",\n", + " \"citations\": [\n", + " {\n", + " \"type\": \"content_block_location\",\n", + " \"cited_text\": \"[Tutorial](https://js.langchain.com/docs/tutorials/)walkthroughs\",\n", + " \"document_index\": 0,\n", + " \"document_title\": null,\n", + " \"start_block_index\": 191,\n", + " \"end_block_index\": 194\n", + " }\n", + " ]\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "import { ChatAnthropic } from \"@langchain/anthropic\";\n", + "import { MarkdownTextSplitter } from \"langchain/text_splitter\";\n", + "\n", + "function formatToAnthropicDocuments(documents: string[]) {\n", + " return {\n", + " type: \"document\",\n", + " source: {\n", + " type: \"content\",\n", + " content: documents.map((document) => ({ type: \"text\", text: document })),\n", + " },\n", + " citations: { enabled: true },\n", + " };\n", + "}\n", + "\n", + "// Pull readme\n", + "const readmeResponse = await fetch(\n", + " \"https://raw.githubusercontent.com/langchain-ai/langchainjs/master/README.md\"\n", + ");\n", + "\n", + "const readme = await readmeResponse.text();\n", + "\n", + "// Split into chunks\n", + "const splitter = new MarkdownTextSplitter({\n", + " chunkOverlap: 0,\n", + " chunkSize: 50,\n", + "});\n", + "const documents = await splitter.splitText(readme);\n", + "\n", + "// Construct message\n", + "const messageWithSplitDocuments = {\n", + " role: \"user\",\n", + " content: [\n", + " formatToAnthropicDocuments(documents),\n", + " { type: \"text\", text: \"Give me a link to LangChain's tutorials. Cite your sources\" },\n", + " ],\n", + "};\n", + "\n", + "// Query LLM\n", + "const citationsModelWithSplits = new ChatAnthropic({\n", + " model: \"claude-3-5-sonnet-latest\",\n", + "});\n", + "const resWithSplits = await citationsModelWithSplits.invoke([messageWithSplitDocuments]);\n", + "\n", + "console.log(JSON.stringify(resWithSplits.content, null, 2));" + ] + }, { "cell_type": "markdown", "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", @@ -992,4 +1185,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/libs/langchain-anthropic/package.json b/libs/langchain-anthropic/package.json index 6d3aa16b109a..5b6a0f4b8bae 100644 --- a/libs/langchain-anthropic/package.json +++ b/libs/langchain-anthropic/package.json @@ -35,7 +35,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@anthropic-ai/sdk": "^0.32.1", + "@anthropic-ai/sdk": "^0.36.3", "fast-xml-parser": "^4.4.1", "zod": "^3.22.4", "zod-to-json-schema": "^3.22.4" diff --git a/libs/langchain-anthropic/src/chat_models.ts b/libs/langchain-anthropic/src/chat_models.ts index 480de0a7c57e..c63a16f0ae66 100644 --- a/libs/langchain-anthropic/src/chat_models.ts +++ b/libs/langchain-anthropic/src/chat_models.ts @@ -25,10 +25,6 @@ import { } from "@langchain/core/runnables"; import { isZodSchema } from "@langchain/core/utils/types"; import { z } from "zod"; -import type { - MessageCreateParams, - Tool as AnthropicTool, -} from "@anthropic-ai/sdk/resources/messages"; import { isLangChainTool } from "@langchain/core/utils/function_calling"; import { AnthropicToolsOutputParser } from "./output_parsers.js"; @@ -64,12 +60,36 @@ export interface ChatAnthropicCallOptions headers?: Record; } -function _toolsInParams(params: AnthropicMessageCreateParams): boolean { +function _toolsInParams( + params: AnthropicMessageCreateParams | AnthropicStreamingMessageCreateParams +): boolean { return !!(params.tools && params.tools.length > 0); } +function _documentsInParams( + params: AnthropicMessageCreateParams | AnthropicStreamingMessageCreateParams +): boolean { + for (const message of params.messages ?? []) { + if (typeof message.content === "string") { + continue; + } + for (const block of message.content ?? []) { + if ( + typeof block === "object" && + block != null && + block.type === "document" && + typeof block.citations === "object" && + block.citations.enabled + ) { + return true; + } + } + } + return false; +} + // eslint-disable-next-line @typescript-eslint/no-explicit-any -function isAnthropicTool(tool: any): tool is AnthropicTool { +function isAnthropicTool(tool: any): tool is Anthropic.Messages.Tool { return "input_schema" in tool; } @@ -685,7 +705,7 @@ export class ChatAnthropicMessages< */ formatStructuredToolToAnthropic( tools: ChatAnthropicCallOptions["tools"] - ): AnthropicTool[] | undefined { + ): Anthropic.Messages.Tool[] | undefined { if (!tools || !tools.length) { return undefined; } @@ -697,7 +717,8 @@ export class ChatAnthropicMessages< return { name: tool.function.name, description: tool.function.description, - input_schema: tool.function.parameters as AnthropicTool.InputSchema, + input_schema: tool.function + .parameters as Anthropic.Messages.Tool.InputSchema, }; } if (isLangChainTool(tool)) { @@ -706,7 +727,7 @@ export class ChatAnthropicMessages< description: tool.description, input_schema: zodToJsonSchema( tool.schema - ) as AnthropicTool.InputSchema, + ) as Anthropic.Messages.Tool.InputSchema, }; } throw new Error( @@ -740,9 +761,9 @@ export class ChatAnthropicMessages< > & Kwargs { const tool_choice: - | MessageCreateParams.ToolChoiceAuto - | MessageCreateParams.ToolChoiceAny - | MessageCreateParams.ToolChoiceTool + | Anthropic.Messages.ToolChoiceAuto + | Anthropic.Messages.ToolChoiceAny + | Anthropic.Messages.ToolChoiceTool | undefined = handleToolChoice(options?.tool_choice); return { @@ -784,22 +805,17 @@ export class ChatAnthropicMessages< ): AsyncGenerator { const params = this.invocationParams(options); const formattedMessages = _convertMessagesToAnthropicPayload(messages); - const coerceContentToString = !_toolsInParams({ + const payload = { ...params, ...formattedMessages, - stream: false, - }); + stream: true, + } as const; + const coerceContentToString = + !_toolsInParams(payload) && !_documentsInParams(payload); - const stream = await this.createStreamWithRetry( - { - ...params, - ...formattedMessages, - stream: true, - }, - { - headers: options.headers, - } - ); + const stream = await this.createStreamWithRetry(payload, { + headers: options.headers, + }); for await (const data of stream) { if (options.signal?.aborted) { @@ -1041,7 +1057,7 @@ export class ChatAnthropicMessages< let functionName = name ?? "extract"; let outputParser: BaseLLMOutputParser; - let tools: AnthropicTool[]; + let tools: Anthropic.Messages.Tool[]; if (isZodSchema(schema)) { const jsonSchema = zodToJsonSchema(schema); tools = [ @@ -1049,7 +1065,7 @@ export class ChatAnthropicMessages< name: functionName, description: jsonSchema.description ?? "A function available to call.", - input_schema: jsonSchema as AnthropicTool.InputSchema, + input_schema: jsonSchema as Anthropic.Messages.Tool.InputSchema, }, ]; outputParser = new AnthropicToolsOutputParser({ @@ -1058,20 +1074,20 @@ export class ChatAnthropicMessages< zodSchema: schema, }); } else { - let anthropicTools: AnthropicTool; + let anthropicTools: Anthropic.Messages.Tool; if ( typeof schema.name === "string" && typeof schema.description === "string" && typeof schema.input_schema === "object" && schema.input_schema != null ) { - anthropicTools = schema as AnthropicTool; + anthropicTools = schema as Anthropic.Messages.Tool; functionName = schema.name; } else { anthropicTools = { name: functionName, description: schema.description ?? "", - input_schema: schema as AnthropicTool.InputSchema, + input_schema: schema as Anthropic.Messages.Tool.InputSchema, }; } tools = [anthropicTools]; diff --git a/libs/langchain-anthropic/src/tests/chat_models.int.test.ts b/libs/langchain-anthropic/src/tests/chat_models.int.test.ts index 20618a14139c..a8983b29dc53 100644 --- a/libs/langchain-anthropic/src/tests/chat_models.int.test.ts +++ b/libs/langchain-anthropic/src/tests/chat_models.int.test.ts @@ -868,3 +868,64 @@ test("Can accept PDF documents", async () => { expect(response.content.length).toBeGreaterThan(10); }); + +test("Citations", async () => { + const citationsModel = new ChatAnthropic({ + model: "claude-3-5-sonnet-latest", + }); + + const messages = [ + { + role: "user", + content: [ + { + type: "document", + source: { + type: "text", + media_type: "text/plain", + data: "The grass the user is asking about is bluegrass. The sky is orange because it's night.", + }, + title: "My Document", + context: "This is a trustworthy document.", + citations: { + enabled: true, + }, + }, + { + type: "text", + text: "What color is the grass and sky?", + }, + ], + }, + ]; + + const response = await citationsModel.invoke(messages); + + expect(response.content.length).toBeGreaterThan(2); + expect(Array.isArray(response.content)).toBe(true); + const blocksWithCitations = (response.content as any[]).filter( + (block) => block.citations !== undefined + ); + expect(blocksWithCitations.length).toEqual(2); + expect(typeof blocksWithCitations[0].citations[0]).toEqual("object"); + + const stream = await citationsModel.stream(messages); + let aggregated; + let chunkHasCitation = false; + for await (const chunk of stream) { + aggregated = aggregated === undefined ? chunk : concat(aggregated, chunk); + if ( + !chunkHasCitation && + Array.isArray(chunk.content) && + chunk.content.some((c: any) => c.citations !== undefined) + ) { + chunkHasCitation = true; + } + } + expect(chunkHasCitation).toBe(true); + expect(Array.isArray(aggregated?.content)).toBe(true); + expect(aggregated?.content.length).toBeGreaterThan(2); + expect( + (aggregated?.content as any[]).some((c) => c.citations !== undefined) + ).toBe(true); +}); diff --git a/libs/langchain-anthropic/src/types.ts b/libs/langchain-anthropic/src/types.ts index 18315b4c7932..f1fb1d778967 100644 --- a/libs/langchain-anthropic/src/types.ts +++ b/libs/langchain-anthropic/src/types.ts @@ -1,5 +1,4 @@ import Anthropic from "@anthropic-ai/sdk"; -import type { Tool as AnthropicTool } from "@anthropic-ai/sdk/resources/messages"; import { BindToolsInput } from "@langchain/core/language_models/chat_models"; export type AnthropicToolResponse = { @@ -28,9 +27,10 @@ export type AnthropicToolChoice = | "auto" | "none" | string; -export type ChatAnthropicToolType = AnthropicTool | BindToolsInput; +export type ChatAnthropicToolType = Anthropic.Messages.Tool | BindToolsInput; export type AnthropicTextBlockParam = Anthropic.Messages.TextBlockParam; export type AnthropicImageBlockParam = Anthropic.Messages.ImageBlockParam; export type AnthropicToolUseBlockParam = Anthropic.Messages.ToolUseBlockParam; export type AnthropicToolResultBlockParam = Anthropic.Messages.ToolResultBlockParam; +export type AnthropicDocumentBlockParam = Anthropic.Messages.DocumentBlockParam; diff --git a/libs/langchain-anthropic/src/utils/message_inputs.ts b/libs/langchain-anthropic/src/utils/message_inputs.ts index df44e296901f..ab11532aad36 100644 --- a/libs/langchain-anthropic/src/utils/message_inputs.ts +++ b/libs/langchain-anthropic/src/utils/message_inputs.ts @@ -18,6 +18,7 @@ import { AnthropicToolResponse, AnthropicToolResultBlockParam, AnthropicToolUseBlockParam, + AnthropicDocumentBlockParam, } from "../types.js"; function _formatImage(imageUrl: string) { @@ -134,8 +135,7 @@ function _formatContent(content: MessageContent) { } else if (contentPart.type === "document") { // PDF return { - type: "document", - source: contentPart.source, + ...contentPart, ...(cacheControl ? { cache_control: cacheControl } : {}), }; } else if ( @@ -282,12 +282,14 @@ function mergeMessages(messages: AnthropicMessageCreateParams["messages"]) { | AnthropicImageBlockParam | AnthropicToolUseBlockParam | AnthropicToolResultBlockParam + | AnthropicDocumentBlockParam > ): Array< | AnthropicTextBlockParam | AnthropicImageBlockParam | AnthropicToolUseBlockParam | AnthropicToolResultBlockParam + | AnthropicDocumentBlockParam > => { if (typeof content === "string") { return [ diff --git a/libs/langchain-anthropic/src/utils/message_outputs.ts b/libs/langchain-anthropic/src/utils/message_outputs.ts index a3a6806de99c..e96376f73597 100644 --- a/libs/langchain-anthropic/src/utils/message_outputs.ts +++ b/libs/langchain-anthropic/src/utils/message_outputs.ts @@ -7,6 +7,7 @@ import { AIMessageChunk, UsageMetadata, } from "@langchain/core/messages"; +import type { ToolCallChunk } from "@langchain/core/messages/tool"; import { ChatGeneration } from "@langchain/core/outputs"; import { AnthropicMessageResponse } from "../types.js"; import { extractToolCalls } from "../output_parsers.js"; @@ -76,10 +77,22 @@ export function _makeMessageChunkFromAnthropicEvent( }; } else if ( data.type === "content_block_start" && - data.content_block.type === "tool_use" + ["tool_use", "document"].includes(data.content_block.type) ) { - const toolCallContentBlock = - data.content_block as Anthropic.Messages.ToolUseBlock; + const contentBlock = data.content_block; + let toolCallChunks: ToolCallChunk[]; + if (contentBlock.type === "tool_use") { + toolCallChunks = [ + { + id: contentBlock.id, + index: data.index, + name: contentBlock.name, + args: "", + }, + ]; + } else { + toolCallChunks = []; + } return { chunk: new AIMessageChunk({ content: fields.coerceContentToString @@ -92,33 +105,29 @@ export function _makeMessageChunkFromAnthropicEvent( }, ], additional_kwargs: {}, - tool_call_chunks: [ - { - id: toolCallContentBlock.id, - index: data.index, - name: toolCallContentBlock.name, - args: "", - }, - ], + tool_call_chunks: toolCallChunks, }), }; } else if ( data.type === "content_block_delta" && - data.delta.type === "text_delta" + ["text_delta", "citations_delta"].includes(data.delta.type) ) { - const content = data.delta?.text; - if (content !== undefined) { + if (fields.coerceContentToString && "text" in data.delta) { return { chunk: new AIMessageChunk({ - content: fields.coerceContentToString - ? content - : [ - { - index: data.index, - ...data.delta, - }, - ], - additional_kwargs: {}, + content: data.delta.text, + }), + }; + } else { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const contentBlock: Record = data.delta; + if ("citation" in contentBlock) { + contentBlock.citations = [contentBlock.citation]; + delete contentBlock.citation; + } + return { + chunk: new AIMessageChunk({ + content: [{ index: data.index, ...contentBlock, type: "text" }], }), }; } diff --git a/yarn.lock b/yarn.lock index fefd13294652..b61175d49943 100644 --- a/yarn.lock +++ b/yarn.lock @@ -241,9 +241,9 @@ __metadata: languageName: node linkType: hard -"@anthropic-ai/sdk@npm:^0.32.1": - version: 0.32.1 - resolution: "@anthropic-ai/sdk@npm:0.32.1" +"@anthropic-ai/sdk@npm:^0.36.3": + version: 0.36.3 + resolution: "@anthropic-ai/sdk@npm:0.36.3" dependencies: "@types/node": ^18.11.18 "@types/node-fetch": ^2.6.4 @@ -252,7 +252,7 @@ __metadata: form-data-encoder: 1.7.2 formdata-node: ^4.3.2 node-fetch: ^2.6.7 - checksum: b48982e0ce066c99afe19448c5d6b38916c2c8873fbdcd4e5116abc45bcf241359604684856bfbd20fcfe00bf544d6d6e7bcf2686a9eb198bd671839b5cd0a67 + checksum: 783a051fea42bb8cc52f92b1aaf1d5d1fb9412f8cc54c0cc3b4f6b5c3b97056bcdebc7c17d0b7efca67a8cf92642e2c9c50d49af80d2bd33b4cf286edf51664c languageName: node linkType: hard @@ -11536,7 +11536,7 @@ __metadata: version: 0.0.0-use.local resolution: "@langchain/anthropic@workspace:libs/langchain-anthropic" dependencies: - "@anthropic-ai/sdk": ^0.32.1 + "@anthropic-ai/sdk": ^0.36.3 "@anthropic-ai/vertex-sdk": ^0.4.1 "@jest/globals": ^29.5.0 "@langchain/core": "workspace:*"