Skip to content

Commit

Permalink
Merge pull request #488 from WolframResearch/main
Browse files Browse the repository at this point in the history
Release v1.3.3
  • Loading branch information
rhennigan authored Dec 5, 2023
2 parents 242740a + e812774 commit b30a0c8
Show file tree
Hide file tree
Showing 10 changed files with 469 additions and 65 deletions.
2 changes: 1 addition & 1 deletion PacletInfo.wl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
PacletObject[ <|
"Name" -> "Wolfram/Chatbook",
"PublisherID" -> "Wolfram",
"Version" -> "1.3.2",
"Version" -> "1.3.3",
"WolframVersion" -> "13.3+",
"Description" -> "Wolfram Notebooks + LLMs",
"License" -> "MIT",
Expand Down
2 changes: 1 addition & 1 deletion Scripts/Common.wl
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ releaseID[ dir_ ] := FirstCase[
(* ::Subsection::Closed:: *)
(*releaseURL*)
releaseURL[ file_ ] := Enclose[
Enclose @ Module[ { pac, repo, ver },
Module[ { pac, repo, ver },
pac = PacletObject @ Flatten @ File @ file;
repo = ConfirmBy[ Environment[ "GITHUB_REPOSITORY" ], StringQ ];
ver = ConfirmBy[ pac[ "Version" ], StringQ ];
Expand Down
113 changes: 93 additions & 20 deletions Source/Chatbook/ChatMessages.wl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Wolfram`Chatbook`CellToChatMessage;
`constructMessages;
`expandMultimodalString;
`getTokenizer;
`getTokenizerName;
`resizeMultimodalImage;

Begin[ "`Private`" ];
Expand Down Expand Up @@ -70,6 +71,10 @@ $styleRoles = <|
"ChatSystemInput" -> "System"
|>;

$cachedTokenizerNames = { "chat-bison", "claude", "gpt-2", "gpt-3.5", "gpt-4-vision", "gpt-4" };
$cachedTokenizers = <| |>;
$fallbackTokenizer = "gpt-2";

(* ::**************************************************************************************************************:: *)
(* ::Section::Closed:: *)
(*CellToChatMessage*)
Expand Down Expand Up @@ -1079,58 +1084,120 @@ argumentTokenToString // endDefinition;
(* ::**************************************************************************************************************:: *)
(* ::Section::Closed:: *)
(*Tokenization*)
$tokenizer := gpt2Tokenizer;
$tokenizer := $gpt2Tokenizer;

(* ::**************************************************************************************************************:: *)
(* ::Subsection::Closed:: *)
(*getTokenizerName*)
getTokenizerName // beginDefinition;

getTokenizerName[ KeyValuePattern[ "TokenizerName"|"Tokenizer" -> name_String ] ] :=
tokenizerName @ name;

getTokenizerName[ KeyValuePattern[ "Tokenizer" -> Except[ $$unspecified ] ] ] :=
"Custom";

getTokenizerName[ KeyValuePattern[ "Model" -> model_ ] ] :=
With[ { name = tokenizerName @ toModelName @ model },
If[ MemberQ[ $cachedTokenizerNames, name ],
name,
$fallbackTokenizer
]
];

getTokenizerName // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsection::Closed:: *)
(*getTokenizer*)
getTokenizer // beginDefinition;
getTokenizer[ KeyValuePattern[ "Tokenizer" -> tokenizer: Except[ $$unspecified ] ] ] := tokenizer;
getTokenizer[ KeyValuePattern[ "Model" -> model_ ] ] := getTokenizer @ model;
getTokenizer[ model_ ] := cachedTokenizer @ toModelName @ model;
getTokenizer[ KeyValuePattern[ "TokenizerName" -> name_String ] ] := cachedTokenizer @ name;
getTokenizer[ KeyValuePattern[ "Model" -> model_ ] ] := cachedTokenizer @ toModelName @ model;
getTokenizer // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*cachedTokenizer*)
cachedTokenizer // beginDefinition;
cachedTokenizer[ All ] := AssociationMap[ cachedTokenizer, $cachedTokenizerNames ];
cachedTokenizer[ name_String ] := cachedTokenizer0 @ tokenizerName @ toModelName @ name;

cachedTokenizer[ All ] :=
AssociationMap[ cachedTokenizer, $cachedTokenizerNames ];

cachedTokenizer[ id_String ] :=
With[ { tokenizer = $cachedTokenizers[ tokenizerName @ toModelName @ id ] },
tokenizer /; ! MatchQ[ tokenizer, $$unspecified ]
];

cachedTokenizer[ id_String ] := Enclose[
Module[ { name, tokenizer },
name = ConfirmBy[ tokenizerName @ toModelName @ id, StringQ, "Name" ];
tokenizer = findTokenizer @ name;
If[ MissingQ @ tokenizer,
(* Fallback to the GPT-2 tokenizer: *)
tokenizer = ConfirmMatch[ $gpt2Tokenizer, Except[ $$unspecified ], "GPT2Tokenizer" ];
If[ TrueQ @ Wolfram`ChatbookInternal`$BuildingMX,
tokenizer, (* Avoid caching fallback values into MX definitions *)
cacheTokenizer[ name, tokenizer ]
],
cacheTokenizer[ name, ConfirmMatch[ tokenizer, Except[ $$unspecified ], "Tokenizer" ] ]
]
],
throwInternalFailure
];

cachedTokenizer // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsubsubsection::Closed:: *)
(*cacheTokenizer*)
cacheTokenizer // beginDefinition;

cachedTokenizer0 // beginDefinition;
cacheTokenizer[ name_String, tokenizer: Except[ $$unspecified ] ] := (
$cachedTokenizerNames = Union[ $cachedTokenizerNames, { name } ];
$cachedTokenizers[ name ] = tokenizer
);

cachedTokenizer0[ "chat-bison" ] = ToCharacterCode[ #, "UTF8" ] &;
cacheTokenizer // endDefinition;

cachedTokenizer0[ "gpt-4-vision" ] :=
If[ graphicsQ[ # ],
gpt4ImageTokenizer[ # ],
cachedTokenizer[ "gpt-4" ][ # ]
] &;
(* ::**************************************************************************************************************:: *)
(* ::Subsubsubsection::Closed:: *)
(*findTokenizer*)
findTokenizer // beginDefinition;

cachedTokenizer0[ model_String ] := Enclose[
findTokenizer[ model_String ] := Enclose[
Quiet @ Module[ { name, tokenizer },
initTools[ ];
Quiet @ Needs[ "Wolfram`LLMFunctions`Utilities`Tokenization`" -> None ];
name = ConfirmBy[ tokens`FindTokenizer @ model, StringQ, "Name" ];
tokenizer = ConfirmMatch[ tokens`LLMTokenizer[ Method -> name ], Except[ _tokens`LLMTokenizer ], "Tokenizer" ];
ConfirmMatch[ tokenizer[ "test" ], _List, "TokenizerTest" ];
cachedTokenizer0[ model ] = tokenizer
tokenizer
],
gpt2Tokenizer &
Missing[ "NotFound" ] &
];

cachedTokenizer0 // endDefinition;
findTokenizer // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsubsubsection::Closed:: *)
(*Pre-cached small tokenizer functions*)
$cachedTokenizers[ "chat-bison" ] = ToCharacterCode[ #, "UTF8" ] &;
$cachedTokenizers[ "gpt-4-vision" ] = If[ graphicsQ[ # ], gpt4ImageTokenizer[ # ], cachedTokenizer[ "gpt-4" ][ # ] ] &;

(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*tokenizerName*)
tokenizerName // beginDefinition;
tokenizerName[ name_String ] := SelectFirst[ $cachedTokenizerNames, StringContainsQ[ name, # ] &, name ];
tokenizerName // endDefinition;

$cachedTokenizerNames = { "gpt-4-vision", "gpt-4", "gpt-3.5", "gpt-2", "claude-2", "claude-instant-1", "chat-bison" };
tokenizerName[ name_String ] :=
SelectFirst[
ReverseSortBy[ $cachedTokenizerNames, StringLength ],
StringContainsQ[ name, #, IgnoreCase -> True ] &,
name
];

tokenizerName // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsection::Closed:: *)
Expand Down Expand Up @@ -1182,13 +1249,19 @@ gpt4ImageTokenCount0 // endDefinition;
(* ::**************************************************************************************************************:: *)
(* ::Subsection::Closed:: *)
(*Fallback Tokenizer*)
gpt2Tokenizer := gpt2Tokenizer = ResourceFunction[ "GPTTokenizer" ][ ];
$gpt2Tokenizer := $gpt2Tokenizer = gpt2Tokenizer[ ];

(* https://resources.wolframcloud.com/FunctionRepository/resources/GPTTokenizer *)
importResourceFunction[ gpt2Tokenizer, "GPTTokenizer" ];

(* ::**************************************************************************************************************:: *)
(* ::Section::Closed:: *)
(*Package Footer*)
If[ Wolfram`ChatbookInternal`$BuildingMX,
cachedTokenizer[ All ];
$gpt2Tokenizer;
(* This is only needed to generate $gpt2Tokenizer once, so it can be removed to reduce MX file size: *)
Remove[ "Wolfram`Chatbook`ResourceFunctions`GPTTokenizer`GPTTokenizer" ];
];

(* :!CodeAnalysis::EndBlock:: *)
Expand Down
51 changes: 50 additions & 1 deletion Source/Chatbook/Formatting.wl
Original file line number Diff line number Diff line change
Expand Up @@ -1137,11 +1137,12 @@ inlineInteractiveCodeCell // beginDefinition;

inlineInteractiveCodeCell[ display_, string_ ] /; $dynamicText := display;

(* TODO: make this switch dynamically depending on $cloudNotebooks (likely as a TemplateBox)*)
inlineInteractiveCodeCell[ display_, string_ ] :=
inlineInteractiveCodeCell[ display, string, contentLanguage @ string ];

inlineInteractiveCodeCell[ display_, string_, lang_ ] /; $cloudNotebooks :=
Mouseover[ display, Column @ { display, floatingButtonGrid[ string, lang ] } ];
cloudInlineInteractiveCodeCell[ display, string, lang ];

inlineInteractiveCodeCell[ display_, string_, lang_ ] :=
DynamicModule[ { $CellContext`attached, $CellContext`cell },
Expand All @@ -1167,6 +1168,54 @@ inlineInteractiveCodeCell[ display_, string_, lang_ ] :=

inlineInteractiveCodeCell // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*cloudInlineInteractiveCodeCell*)
cloudInlineInteractiveCodeCell // beginDefinition;

cloudInlineInteractiveCodeCell[ display_, string_, lang_ ] :=
Module[ { padded, buttons },

padded = Pane[ display, ImageSize -> { { 100, Automatic }, { 30, Automatic } } ];

buttons = Framed[
floatingButtonGrid[ string, lang ],
Background -> White,
FrameMargins -> { { 1, 0 }, { 0, 1 } },
FrameStyle -> White,
ImageMargins -> 1,
RoundingRadius -> 3
];

Mouseover[
buttonOverlay[ padded, Invisible @ buttons ],
buttonOverlay[ padded, buttons ],
ContentPadding -> False,
FrameMargins -> 0,
ImageMargins -> 0,
ImageSize -> All
]
];

cloudInlineInteractiveCodeCell // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*buttonOverlay*)
buttonOverlay // beginDefinition;

buttonOverlay[ a_, b_ ] := Overlay[
{ a, b },
All,
2,
Alignment -> { Left, Bottom },
ContentPadding -> False,
FrameMargins -> 0,
ImageMargins -> 0
];

buttonOverlay // endDefinition;

(* ::**************************************************************************************************************:: *)
(* ::Subsection::Closed:: *)
(*makeInlineCodeCell*)
Expand Down
1 change: 1 addition & 0 deletions Source/Chatbook/Main.wl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ BeginPackage[ "Wolfram`Chatbook`" ];
(* ::**************************************************************************************************************:: *)
(* ::Subsection::Closed:: *)
(*Declare Symbols*)
`$AvailableTools;
`$ChatHandlerData;
`$ChatPost;
`$ChatPre;
Expand Down
22 changes: 22 additions & 0 deletions Source/Chatbook/Prompting.wl
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ $basePromptOrder = {
"Checkboxes",
"CheckboxesIndeterminate",
"ConversionFormatting",
"SpecialURI",
"SpecialURIAudio",
"SpecialURIVideo",
"SpecialURIDynamic",
"VisibleUserInput",
"TrivialCode",
"Packages",
Expand All @@ -51,6 +55,7 @@ $basePromptClasses = <|
"Math" -> { "MathExpressions" },
"Formatting" -> { "CodeBlocks", "MathExpressions", "EscapedCharacters" },
"MessageConversion" -> { "ConversionLargeOutputs", "ConversionGraphics", "ConversionFormatting" },
"SpecialURIs" -> { "SpecialURIAudio", "SpecialURIVideo", "SpecialURIDynamic" },
"All" -> $basePromptOrder
|>;

Expand All @@ -71,6 +76,10 @@ $basePromptDependencies = Append[ "GeneralInstructionsHeader" ] /@ <|
"ConversionGraphics" -> { "MessageConversionHeader" },
"MarkdownImageBox" -> { "MessageConversionHeader" },
"ConversionFormatting" -> { "MessageConversionHeader" },
"SpecialURI" -> { },
"SpecialURIAudio" -> { "SpecialURI" },
"SpecialURIVideo" -> { "SpecialURI" },
"SpecialURIDynamic" -> { "SpecialURI" },
"VisibleUserInput" -> { },
"TrivialCode" -> { },
"WolframSymbolCapitalization" -> { },
Expand Down Expand Up @@ -159,6 +168,19 @@ $basePromptComponents[ "ConversionFormatting" ] = "\
``Cell[TextData[{StyleBox[\"Styled\", FontSlant -> \"Italic\"], \" message\"}], \"ChatInput\"]`` \
becomes ``Styled message``.";

$basePromptComponents[ "SpecialURI" ] = "\
* You will occasionally see markdown links with special URI schemes, e.g. ![label](scheme://content-id) that represent \
interactive interface elements. You can use these in your responses to display the same elements to the user.";

$basePromptComponents[ "SpecialURIAudio" ] = "\
* ![label](audio://content-id) represents an interactive audio player.";

$basePromptComponents[ "SpecialURIVideo" ] = "\
* ![label](video://content-id) represents an interactive video player.";

$basePromptComponents[ "SpecialURIDynamic" ] = "\
* ![label](dynamic://content-id) represents an embedded dynamic UI.";

$basePromptComponents[ "VisibleUserInput" ] = "\
* The user can still see their input, so there's no need to repeat it in your response";

Expand Down
Loading

0 comments on commit b30a0c8

Please sign in to comment.