Skip to content

Commit

Permalink
Lay some groundwork for language plugins (#3837)
Browse files Browse the repository at this point in the history
This PR adds undocumented functionality for loading custom language
plugins at runtime. I don't intend to expose the functionality to end
users yet, but this will allow the team to test the capability
internally.

### Implementation

There isn't much new code in Zed. Most of the work here is within
Tree-sitter, in PRs tree-sitter/tree-sitter#1864
and tree-sitter/tree-sitter#2840, which allow
Tree-sitter to load languages from WASM blobs. I've tested the
functionality in Tree-sitter's test suite and via its CLI, but having it
wired into Zed allows us to test the functionality more fully.

### Details

Now, on startup, Zed will look for subdirectories inside of
`~/Application Support/plugins`. These subdirectories are expected to
look similar to the per-language subdirectories in
[`crates/zed2/src/languages`](https://github.com/zed-industries/zed/tree/main/crates/zed2/src/languages),
except that they also contain a `.wasm` file for the parser itself.

I'll add more details here as I go.
  • Loading branch information
maxbrunsfeld authored Jan 3, 2024
2 parents 5f84172 + 64e5122 commit b594e59
Show file tree
Hide file tree
Showing 12 changed files with 518 additions and 95 deletions.
436 changes: 393 additions & 43 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ members = [
"crates/picker2",
"crates/plugin",
"crates/plugin_macros",
"crates/plugin_runtime",
"crates/prettier",
"crates/prettier2",
"crates/project",
Expand Down Expand Up @@ -176,11 +175,12 @@ thiserror = { version = "1.0.29" }
time = { version = "0.3", features = ["serde", "serde-well-known"] }
toml = { version = "0.5" }
tiktoken-rs = "0.5.7"
tree-sitter = "0.20"
tree-sitter = { version = "0.20", features = ["wasm"] }
unindent = { version = "0.1.7" }
pretty_assertions = "1.3.0"
git2 = { version = "0.15", default-features = false}
uuid = { version = "1.1.2", features = ["v4"] }
wasmtime = "16"

tree-sitter-bash = { git = "https://github.com/tree-sitter/tree-sitter-bash", rev = "7331995b19b8f8aba2d5e26deb51d2195c18bc94" }
tree-sitter-c = "0.20.1"
Expand Down Expand Up @@ -212,8 +212,9 @@ tree-sitter-vue = {git = "https://github.com/zed-industries/tree-sitter-vue", re
tree-sitter-uiua = {git = "https://github.com/shnarazk/tree-sitter-uiua", rev = "9260f11be5900beda4ee6d1a24ab8ddfaf5a19b2"}

[patch.crates-io]
tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "b5f461a69bf3df7298b1903574d506179e6390b0" }
tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "31c40449749c4263a91a43593831b82229049a4c" }
async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" }
wasmtime = { git = "https://github.com/bytecodealliance/wasmtime", rev = "v16.0.0" }

# TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457
cocoa = { git = "https://github.com/servo/core-foundation-rs", rev = "079665882507dd5e2ff77db3de5070c1f6c0fb85" }
Expand Down
18 changes: 9 additions & 9 deletions crates/language/src/language.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,7 @@ impl Language {
indents_config: None,
injection_config: None,
override_config: None,
error_query: Query::new(ts_language, "(ERROR) @error").unwrap(),
error_query: Query::new(&ts_language, "(ERROR) @error").unwrap(),
ts_language,
highlight_map: Default::default(),
})
Expand Down Expand Up @@ -1230,13 +1230,13 @@ impl Language {

pub fn with_highlights_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
grammar.highlights_query = Some(Query::new(grammar.ts_language, source)?);
grammar.highlights_query = Some(Query::new(&grammar.ts_language, source)?);
Ok(self)
}

pub fn with_outline_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut item_capture_ix = None;
let mut name_capture_ix = None;
let mut context_capture_ix = None;
Expand Down Expand Up @@ -1264,7 +1264,7 @@ impl Language {

pub fn with_embedding_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut item_capture_ix = None;
let mut name_capture_ix = None;
let mut context_capture_ix = None;
Expand Down Expand Up @@ -1295,7 +1295,7 @@ impl Language {

pub fn with_brackets_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut open_capture_ix = None;
let mut close_capture_ix = None;
get_capture_indices(
Expand All @@ -1317,7 +1317,7 @@ impl Language {

pub fn with_indents_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut indent_capture_ix = None;
let mut start_capture_ix = None;
let mut end_capture_ix = None;
Expand Down Expand Up @@ -1345,7 +1345,7 @@ impl Language {

pub fn with_injection_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut language_capture_ix = None;
let mut content_capture_ix = None;
get_capture_indices(
Expand Down Expand Up @@ -1384,7 +1384,7 @@ impl Language {
}

pub fn with_override_query(mut self, source: &str) -> anyhow::Result<Self> {
let query = Query::new(self.grammar_mut().ts_language, source)?;
let query = Query::new(&self.grammar_mut().ts_language, source)?;

let mut override_configs_by_id = HashMap::default();
for (ix, name) in query.capture_names().iter().copied().enumerate() {
Expand Down Expand Up @@ -1695,7 +1695,7 @@ impl Grammar {
PARSER.with(|parser| {
let mut parser = parser.borrow_mut();
parser
.set_language(self.ts_language)
.set_language(&self.ts_language)
.expect("incompatible grammar");
let mut chunks = text.chunks_in_range(0..text.len());
parser
Expand Down
2 changes: 1 addition & 1 deletion crates/language/src/syntax_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,7 @@ fn parse_text(
.set_included_ranges(&ranges)
.expect("overlapping ranges");
parser
.set_language(grammar.ts_language)
.set_language(&grammar.ts_language)
.expect("incompatible grammar");
parser
.parse_with(
Expand Down
109 changes: 85 additions & 24 deletions crates/language2/src/language2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use std::{
};
use syntax_map::SyntaxSnapshot;
use theme::{SyntaxTheme, Theme};
use tree_sitter::{self, Query};
use tree_sitter::{self, wasmtime, Query, WasmStore};
use unicase::UniCase;
use util::{http::HttpClient, paths::PathExt};
use util::{post_inc, ResultExt, TryFutureExt as _, UnwrapFuture};
Expand Down Expand Up @@ -84,10 +84,15 @@ impl LspBinaryStatusSender {
}

thread_local! {
static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
static PARSER: RefCell<Parser> = {
let mut parser = Parser::new();
parser.set_wasm_store(WasmStore::new(WASM_ENGINE.clone()).unwrap()).unwrap();
RefCell::new(parser)
};
}

lazy_static! {
pub static ref WASM_ENGINE: wasmtime::Engine = wasmtime::Engine::default();
pub static ref NEXT_GRAMMAR_ID: AtomicUsize = Default::default();
pub static ref PLAIN_TEXT: Arc<Language> = Arc::new(Language::new(
LanguageConfig {
Expand Down Expand Up @@ -360,6 +365,7 @@ pub struct CodeLabel {
#[derive(Clone, Deserialize)]
pub struct LanguageConfig {
pub name: Arc<str>,
pub grammar_name: Option<Arc<str>>,
pub path_suffixes: Vec<String>,
pub brackets: BracketPairConfig,
#[serde(default, deserialize_with = "deserialize_regex")]
Expand Down Expand Up @@ -446,6 +452,7 @@ impl Default for LanguageConfig {
fn default() -> Self {
Self {
name: "".into(),
grammar_name: None,
path_suffixes: Default::default(),
brackets: Default::default(),
auto_indent_using_last_non_empty_line: auto_indent_using_last_non_empty_line_default(),
Expand Down Expand Up @@ -620,14 +627,25 @@ type AvailableLanguageId = usize;
#[derive(Clone)]
struct AvailableLanguage {
id: AvailableLanguageId,
path: &'static str,
config: LanguageConfig,
grammar: tree_sitter::Language,
grammar: AvailableGrammar,
lsp_adapters: Vec<Arc<dyn LspAdapter>>,
get_queries: fn(&str) -> LanguageQueries,
loaded: bool,
}

#[derive(Clone)]
enum AvailableGrammar {
Native {
grammar: tree_sitter::Language,
asset_dir: &'static str,
get_queries: fn(&str) -> LanguageQueries,
},
Wasm {
grammar_name: Arc<str>,
path: Arc<Path>,
},
}

pub struct LanguageRegistry {
state: RwLock<LanguageRegistryState>,
language_server_download_dir: Option<Arc<Path>>,
Expand Down Expand Up @@ -699,7 +717,7 @@ impl LanguageRegistry {

pub fn register(
&self,
path: &'static str,
asset_dir: &'static str,
config: LanguageConfig,
grammar: tree_sitter::Language,
lsp_adapters: Vec<Arc<dyn LspAdapter>>,
Expand All @@ -708,11 +726,24 @@ impl LanguageRegistry {
let state = &mut *self.state.write();
state.available_languages.push(AvailableLanguage {
id: post_inc(&mut state.next_available_language_id),
path,
config,
grammar,
grammar: AvailableGrammar::Native {
grammar,
get_queries,
asset_dir,
},
lsp_adapters,
get_queries,
loaded: false,
});
}

pub fn register_wasm(&self, path: Arc<Path>, grammar_name: Arc<str>, config: LanguageConfig) {
let state = &mut *self.state.write();
state.available_languages.push(AvailableLanguage {
id: post_inc(&mut state.next_available_language_id),
config,
grammar: AvailableGrammar::Wasm { grammar_name, path },
lsp_adapters: Vec::new(),
loaded: false,
});
}
Expand Down Expand Up @@ -837,13 +868,43 @@ impl LanguageRegistry {
executor
.spawn(async move {
let id = language.id;
let queries = (language.get_queries)(&language.path);
let language =
Language::new(language.config, Some(language.grammar))
let name = language.config.name.clone();
let language = async {
let (grammar, queries) = match language.grammar {
AvailableGrammar::Native {
grammar,
asset_dir,
get_queries,
} => (grammar, (get_queries)(asset_dir)),
AvailableGrammar::Wasm { grammar_name, path } => {
let mut wasm_path = path.join(grammar_name.as_ref());
wasm_path.set_extension("wasm");
let wasm_bytes = std::fs::read(&wasm_path)?;
let grammar = PARSER.with(|parser| {
let mut parser = parser.borrow_mut();
let mut store = parser.take_wasm_store().unwrap();
let grammar =
store.load_language(&grammar_name, &wasm_bytes);
parser.set_wasm_store(store).unwrap();
grammar
})?;
let mut queries = LanguageQueries::default();
if let Ok(contents) = std::fs::read_to_string(
&path.join("highlights.scm"),
) {
queries.highlights = Some(contents.into());
}
(grammar, queries)
}
};
Language::new(language.config, Some(grammar))
.with_lsp_adapters(language.lsp_adapters)
.await;
let name = language.name();
match language.with_queries(queries) {
.await
.with_queries(queries)
}
.await;

match language {
Ok(language) => {
let language = Arc::new(language);
let mut state = this.state.write();
Expand Down Expand Up @@ -1175,7 +1236,7 @@ impl Language {
indents_config: None,
injection_config: None,
override_config: None,
error_query: Query::new(ts_language, "(ERROR) @error").unwrap(),
error_query: Query::new(&ts_language, "(ERROR) @error").unwrap(),
ts_language,
highlight_map: Default::default(),
})
Expand Down Expand Up @@ -1236,13 +1297,13 @@ impl Language {

pub fn with_highlights_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
grammar.highlights_query = Some(Query::new(grammar.ts_language, source)?);
grammar.highlights_query = Some(Query::new(&grammar.ts_language, source)?);
Ok(self)
}

pub fn with_outline_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut item_capture_ix = None;
let mut name_capture_ix = None;
let mut context_capture_ix = None;
Expand Down Expand Up @@ -1270,7 +1331,7 @@ impl Language {

pub fn with_embedding_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut item_capture_ix = None;
let mut name_capture_ix = None;
let mut context_capture_ix = None;
Expand Down Expand Up @@ -1301,7 +1362,7 @@ impl Language {

pub fn with_brackets_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut open_capture_ix = None;
let mut close_capture_ix = None;
get_capture_indices(
Expand All @@ -1323,7 +1384,7 @@ impl Language {

pub fn with_indents_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut indent_capture_ix = None;
let mut start_capture_ix = None;
let mut end_capture_ix = None;
Expand Down Expand Up @@ -1351,7 +1412,7 @@ impl Language {

pub fn with_injection_query(mut self, source: &str) -> Result<Self> {
let grammar = self.grammar_mut();
let query = Query::new(grammar.ts_language, source)?;
let query = Query::new(&grammar.ts_language, source)?;
let mut language_capture_ix = None;
let mut content_capture_ix = None;
get_capture_indices(
Expand Down Expand Up @@ -1390,7 +1451,7 @@ impl Language {
}

pub fn with_override_query(mut self, source: &str) -> anyhow::Result<Self> {
let query = Query::new(self.grammar_mut().ts_language, source)?;
let query = Query::new(&self.grammar_mut().ts_language, source)?;

let mut override_configs_by_id = HashMap::default();
for (ix, name) in query.capture_names().iter().enumerate() {
Expand Down Expand Up @@ -1701,7 +1762,7 @@ impl Grammar {
PARSER.with(|parser| {
let mut parser = parser.borrow_mut();
parser
.set_language(self.ts_language)
.set_language(&self.ts_language)
.expect("incompatible grammar");
let mut chunks = text.chunks_in_range(0..text.len());
parser
Expand Down
11 changes: 3 additions & 8 deletions crates/language2/src/syntax_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use futures::FutureExt;
use parking_lot::Mutex;
use std::{
borrow::Cow,
cell::RefCell,
cmp::{self, Ordering, Reverse},
collections::BinaryHeap,
fmt, iter,
Expand All @@ -16,13 +15,9 @@ use std::{
};
use sum_tree::{Bias, SeekTarget, SumTree};
use text::{Anchor, BufferSnapshot, OffsetRangeExt, Point, Rope, ToOffset, ToPoint};
use tree_sitter::{
Node, Parser, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree,
};
use tree_sitter::{Node, Query, QueryCapture, QueryCaptures, QueryCursor, QueryMatches, Tree};

thread_local! {
static PARSER: RefCell<Parser> = RefCell::new(Parser::new());
}
use super::PARSER;

static QUERY_CURSORS: Mutex<Vec<QueryCursor>> = Mutex::new(vec![]);

Expand Down Expand Up @@ -1166,7 +1161,7 @@ fn parse_text(
.set_included_ranges(&ranges)
.expect("overlapping ranges");
parser
.set_language(grammar.ts_language)
.set_language(&grammar.ts_language)
.expect("incompatible grammar");
parser
.parse_with(
Expand Down
2 changes: 1 addition & 1 deletion crates/semantic_index/src/parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ impl CodeContextRetriever {
.embedding_config
.as_ref()
.ok_or_else(|| anyhow!("no embedding queries"))?;
self.parser.set_language(grammar.ts_language).unwrap();
self.parser.set_language(&grammar.ts_language).unwrap();

let tree = self
.parser
Expand Down
Loading

0 comments on commit b594e59

Please sign in to comment.