diff --git a/core/src/cache.rs b/core/src/cache.rs index dc6b7a47a..6638bb54f 100644 --- a/core/src/cache.rs +++ b/core/src/cache.rs @@ -1,5 +1,11 @@ //! Source cache. -use ast_cache::AstCache; +pub use ast_cache::AstCache; + +//TODO: (RFC007 migration) +// +// - [ ] Implement the AstResolver trait +// - [ ] Handle cyclic imports in the new resolver +// - [ ] Clean self.sources.files.stdlib_modules(): maybe make a wrapper in the source cache? use crate::{ bytecode::ast::{compat::ToMainline, Ast, AstAlloc}, @@ -80,25 +86,901 @@ impl InputFormat { }) } - pub fn to_tag(&self) -> &'static str { - match self { - InputFormat::Nickel => "Nickel", - InputFormat::Json => "Json", - InputFormat::Yaml => "Yaml", - InputFormat::Toml => "Toml", - InputFormat::Text => "Text", - #[cfg(feature = "nix-experimental")] - InputFormat::Nix => "Nix", + pub fn to_tag(&self) -> &'static str { + match self { + InputFormat::Nickel => "Nickel", + InputFormat::Json => "Json", + InputFormat::Yaml => "Yaml", + InputFormat::Toml => "Toml", + InputFormat::Text => "Text", + #[cfg(feature = "nix-experimental")] + InputFormat::Nix => "Nix", + } + } + + /// Extracts format embedded in SourcePath + pub fn from_source_path(source_path: &SourcePath) -> Option { + if let SourcePath::Path(_p, fmt) = source_path { + Some(*fmt) + } else { + None + } + } +} + +#[derive(Debug, Clone)] +pub struct TermCache { + /// The table storing parsed terms corresponding to the entries of the file database. + terms: HashMap, +} + +impl TermCache { + pub fn new() -> Self { + TermCache { + terms: HashMap::new(), + } + } + + /// Update the state of an entry. Return the previous state. + pub fn update_state(&mut self, file_id: FileId, new: EntryState) -> Option { + self.terms + .get_mut(&file_id) + .map(|TermEntry { state, .. }| std::mem::replace(state, new)) + } + + fn transform( + &mut self, + wildcards: &WildcardsCache, + import_data: &ImportData, + file_id: FileId, + ) -> Result, CacheError> { + match self.terms.get(&file_id).map(|entry| entry.state) { + Some(state) if state >= EntryState::Transformed => Ok(CacheOp::Cached(())), + Some(state) if state >= EntryState::Parsed => { + if state < EntryState::Transforming { + let cached_term = self.terms.remove(&file_id).unwrap(); + let term = + transform::transform(cached_term.term, wildcards.wildcards.get(&file_id))?; + self.terms.insert( + file_id, + TermEntry { + term, + state: EntryState::Transforming, + ..cached_term + }, + ); + + if let Some(imports) = import_data.imports.get(&file_id).cloned() { + for file_id in imports.into_iter() { + self.transform(wildcards, import_data, file_id)?; + } + } + + self.update_state(file_id, EntryState::Transformed); + } + Ok(CacheOp::Done(())) + } + _ => Err(CacheError::NotParsed), + } + } + + /// Apply program transformations to all the fields of a record. + /// + /// Used to transform stdlib modules and other records loaded in the environment, when using + /// e.g. the `load` command of the REPL. If one just uses [Self::transform], the share normal + /// form transformation would add let bindings to a record entry `{ ... }`, turning it into + /// `let %0 = ... in ... in { ... }`. But stdlib entries are required to be syntactically + /// records. + /// + /// Note that this requirement may be relaxed in the future by e.g. evaluating stdlib entries + /// before adding their fields to the initial environment. + /// + /// # Preconditions + /// + /// - the entry must syntactically be a record (`Record` or `RecRecord`). Otherwise, this + /// function panics + pub fn transform_inner( + &mut self, + wildcards: &WildcardsCache, + import_data: &ImportData, + file_id: FileId, + ) -> Result, CacheError> { + match self.entry_state(file_id) { + Some(state) if state >= EntryState::Transformed => Ok(CacheOp::Cached(())), + Some(_) => { + let TermEntry { + mut term, + state, + parse_errs, + } = self.terms.remove(&file_id).unwrap(); + + let wildcards = wildcards.get(&file_id); + + if state < EntryState::Transforming { + match SharedTerm::make_mut(&mut term.term) { + Term::Record(RecordData { ref mut fields, .. }) => { + let map_res: Result<_, UnboundTypeVariableError> = + std::mem::take(fields) + .into_iter() + .map(|(id, field)| { + Ok(( + id, + field.try_map_value(|v| { + transform::transform(v, wildcards) + })?, + )) + }) + .collect(); + *fields = map_res.map_err(CacheError::Error)?; + } + Term::RecRecord(ref mut record, ref mut dyn_fields, ..) => { + let map_res: Result<_, UnboundTypeVariableError> = + std::mem::take(&mut record.fields) + .into_iter() + .map(|(id, field)| { + Ok(( + id, + field.try_map_value(|v| { + transform::transform(v, wildcards) + })?, + )) + }) + .collect(); + + let dyn_fields_res: Result<_, UnboundTypeVariableError> = + std::mem::take(dyn_fields) + .into_iter() + .map(|(id_t, mut field)| { + let value = field + .value + .take() + .map(|v| transform::transform(v, wildcards)) + .transpose()?; + + Ok(( + transform::transform(id_t, wildcards)?, + Field { value, ..field }, + )) + }) + .collect(); + + record.fields = map_res.map_err(CacheError::Error)?; + *dyn_fields = dyn_fields_res.map_err(CacheError::Error)?; + } + _ => panic!("cache::transform_inner(): not a record"), + } + + self.terms.insert( + file_id, + TermEntry { + term, + state: EntryState::Transforming, + parse_errs, + }, + ); + + if let Some(imports) = import_data.imports.get(&file_id).cloned() { + for file_id in imports.into_iter() { + self.transform(wildcards, import_data, file_id) + .map_err(|_| CacheError::NotParsed)?; + } + } + self.update_state(file_id, EntryState::Transformed); + } + + Ok(CacheOp::Done(())) + } + None => Err(CacheError::NotParsed), + } + } + + /// Retrieve the state of an entry. Return `None` if the entry is not in the term cache, + /// meaning that the content of the source has been loaded but has not been parsed yet. + pub fn entry_state(&self, file_id: FileId) -> Option { + self.terms + .get(&file_id) + .map(|TermEntry { state, .. }| state) + .copied() + } +} + +pub struct SourceCache { + /// The content of the program sources plus imports. + files: Files, + /// Reverse map from file ids to source paths. + file_paths: HashMap, + /// The name-id table, holding file ids stored in the database indexed by source names. + file_ids: HashMap, + /// Paths where to look for imports, as included by the user through either the CLI argument + /// `--import-path` or the environment variable `$NICKEL_IMPORT_PATH`. + import_paths: Vec, + /// A table mapping FileIds to the package that they belong to. + /// + /// Path dependencies have already been canonicalized to absolute paths. + packages: HashMap, + /// The map used to resolve package imports. + package_map: Option, +} + +impl SourceCache { + pub fn new() -> Self { + SourceCache { + files: Files::new(), + file_paths: HashMap::new(), + file_ids: HashMap::new(), + import_paths: Vec::new(), + packages: HashMap::new(), + package_map: None, + } + } + + pub fn add_import_paths

(&mut self, paths: impl Iterator) + where + PathBuf: From

, + { + self.import_paths.extend(paths.map(PathBuf::from)); + } + + /// Sets the package map to use for package import resolution. + pub fn set_package_map(&mut self, map: PackageMap) { + self.package_map = Some(map); + } + + /// Same as [Self::add_file], but assume that the path is already normalized, and take the + /// timestamp as a parameter. + fn add_normalized_file( + &mut self, + path: PathBuf, + format: InputFormat, + timestamp: SystemTime, + ) -> io::Result { + let contents = std::fs::read_to_string(&path)?; + let file_id = self.files.add(&path, contents); + self.file_paths + .insert(file_id, SourcePath::Path(path.clone(), format)); + self.file_ids.insert( + SourcePath::Path(path, format), + NameIdEntry { + id: file_id, + source: SourceKind::Filesystem(timestamp), + }, + ); + Ok(file_id) + } + + /// Load a file from the filesystem and add it to the name-id table. + /// + /// Uses the normalized path and the *modified at* timestamp as the name-id table entry. + /// Overrides any existing entry with the same name. + pub fn add_file( + &mut self, + path: impl Into, + format: InputFormat, + ) -> io::Result { + let path = path.into(); + let timestamp = timestamp(&path)?; + let normalized = normalize_path(&path)?; + self.add_normalized_file(normalized, format, timestamp) + } + + /// Try to retrieve the id of a file from the cache. + /// + /// If it was not in cache, try to read it from the filesystem and add it as a new entry. + pub fn get_or_add_file( + &mut self, + path: impl Into, + format: InputFormat, + ) -> io::Result> { + let path = path.into(); + let normalized = normalize_path(&path)?; + match self.id_or_new_timestamp_of(path.as_ref(), format)? { + SourceState::UpToDate(id) => Ok(CacheOp::Cached(id)), + SourceState::Stale(timestamp) => self + .add_normalized_file(normalized, format, timestamp) + .map(CacheOp::Done), + } + } + + /// Load a source and add it to the name-id table. + /// + /// Do not check if a source with the same name already exists: if it is the + /// case, this one will override the old entry in the name-id table. + pub fn add_source(&mut self, source_name: SourcePath, mut source: T) -> io::Result + where + T: Read, + { + let mut buffer = String::new(); + source.read_to_string(&mut buffer)?; + Ok(self.add_string(source_name, buffer)) + } + + /// Returns the source code of a file. + /// + /// Panics if the file id is invalid. + pub fn source(&self, id: FileId) -> &str { + self.files.source(id) + } + + /// Load a new source as a string and add it to the name-id table. + /// + /// Do not check if a source with the same name already exists: if it is the case, this one + /// will override the old entry in the name-id table but the old `FileId` will remain valid. + pub fn add_string(&mut self, source_name: SourcePath, s: String) -> FileId { + let id = self.files.add(source_name.clone(), s); + self.file_paths.insert(id, source_name.clone()); + self.file_ids.insert( + source_name, + NameIdEntry { + id, + source: SourceKind::Memory, + }, + ); + id + } + + /// Load a new source as a string, replacing any existing source with the same name. + /// + /// If there was a previous source with the same name, its `FileId` is reused and the + /// cached term is deleted. + /// + /// Used to store intermediate short-lived generated snippets that needs to have a + /// corresponding `FileId`, such as when querying or reporting errors. + pub fn replace_string( + &mut self, + asts: &mut AstCache, + terms: &mut TermCache, + source_name: SourcePath, + s: String, + ) -> FileId { + if let Some(file_id) = self.id_of(&source_name) { + self.files.update(file_id, s); + asts.remove(file_id); + terms.terms.remove(&file_id); + file_id + } else { + let file_id = self.files.add(source_name.clone(), s); + self.file_paths.insert(file_id, source_name.clone()); + self.file_ids.insert( + source_name, + NameIdEntry { + id: file_id, + source: SourceKind::Memory, + }, + ); + file_id + } + } + + /// Retrieve the id of a source given a name. + /// + /// Note that files added via [Self::add_file] are indexed by their full normalized path (cf + /// [normalize_path]). + pub fn id_of(&self, name: &SourcePath) -> Option { + match name { + SourcePath::Path(p, fmt) => match self.id_or_new_timestamp_of(p, *fmt).ok()? { + SourceState::UpToDate(id) => Some(id), + SourceState::Stale(_) => None, + }, + name => Some(self.file_ids.get(name)?.id), + } + } + + /// Try to retrieve the id of a cached source. + /// + /// Only returns `Ok` if the source is up-to-date; if the source is stale, returns + /// either the new timestamp of the up-to-date file or the error we encountered when + /// trying to read it (which most likely means there was no such file). + /// + /// The main point of this awkward signature is to minimize I/O operations: if we accessed + /// the timestamp, keep it around. + fn id_or_new_timestamp_of(&self, name: &Path, format: InputFormat) -> io::Result { + match self + .file_ids + .get(&SourcePath::Path(name.to_owned(), format)) + { + None => Ok(SourceState::Stale(timestamp(name)?)), + Some(NameIdEntry { + id, + source: SourceKind::Filesystem(ts), + }) => { + let new_timestamp = timestamp(name)?; + if ts == &new_timestamp { + Ok(SourceState::UpToDate(*id)) + } else { + Ok(SourceState::Stale(new_timestamp)) + } + } + Some(NameIdEntry { + id, + source: SourceKind::Memory, + }) => Ok(SourceState::UpToDate(*id)), + } + } + + /// Get a reference to the underlying files. Required by + /// the WASM REPL error reporting code and LSP functions. + pub fn files(&self) -> &Files { + &self.files + } + + /// Parse a Nickel source without querying nor populating the cache. + pub fn parse_nickel_nocache<'a, 'ast>( + &'a self, + // We take the allocator explicitly, to make sure `self.asts` is properly initialized + // before calling this function, and won't be dropped . + alloc: &'ast AstAlloc, + file_id: FileId, + ) -> Result<(Ast<'ast>, ParseErrors), ParseError> { + parse_nickel(alloc, file_id, self.files.source(file_id)) + } + + /// Parse a source that isn't Nickel without querying nor populating the cache. Support + /// multiple formats. + /// + /// The Nickel/non Nickel distinction is a bit artificial at the moment, due to the fact that + /// parsing Nickel returns the new [crate::bytecode::ast::Ast], while parsing other formats + /// don't go through the new AST first but directly deserialize to the legacy + /// [crate::term::Term] for simplicity and performance reasons. + pub fn parse_other_nocache( + &self, + file_id: FileId, + format: InputFormat, + ) -> Result<(RichTerm, ParseErrors), ParseError> { + let attach_pos = |t: RichTerm| -> RichTerm { + let pos: TermPos = self.files.source_span(file_id).into(); + t.with_pos(pos) + }; + + let source = self.files.source(file_id); + + match format { + InputFormat::Nickel => { + // Panicking isn't great, but we expect this to be temporary, until RFC007 is fully + // implemented. + panic!("error: trying to parse a Nickel source with parse_other_nocache") + } + InputFormat::Json => serde_json::from_str(source) + .map(|t| (attach_pos(t), ParseErrors::default())) + .map_err(|err| ParseError::from_serde_json(err, file_id, &self.files)), + InputFormat::Yaml => { + // YAML files can contain multiple documents. If there is only + // one we transparently deserialize it. If there are multiple, + // we deserialize the file as an array. + let de = serde_yaml::Deserializer::from_str(source); + let mut terms = de + .map(|de| { + RichTerm::deserialize(de) + .map(attach_pos) + .map_err(|err| (ParseError::from_serde_yaml(err, file_id))) + }) + .collect::, _>>()?; + + if terms.is_empty() { + unreachable!( + "serde always produces at least one document, \ + the empty string turns into `null`" + ) + } else if terms.len() == 1 { + Ok(( + terms.pop().expect("we just checked the length"), + ParseErrors::default(), + )) + } else { + Ok(( + attach_pos( + Term::Array(terms.into_iter().collect(), Default::default()).into(), + ), + ParseErrors::default(), + )) + } + } + InputFormat::Toml => crate::serialize::toml_deser::from_str(source, file_id) + .map(|t| (attach_pos(t), ParseErrors::default())) + .map_err(|err| (ParseError::from_toml(err, file_id))), + #[cfg(feature = "nix-experimental")] + InputFormat::Nix => { + let json = nix_ffi::eval_to_json(source) + .map_err(|e| ParseError::from_nix(e.what(), file_id))?; + serde_json::from_str(&json) + .map(|t| (attach_pos(t), ParseErrors::default())) + .map_err(|err| ParseError::from_serde_json(err, file_id, &self.files)) + } + InputFormat::Text => Ok(( + attach_pos(Term::Str(source.into()).into()), + ParseErrors::default(), + )), + } + } + + /// Same as [Self::prepare], but do not use nor populate the cache. Used for inputs which are + /// known to not be reused. + /// + /// In this case, the caller has to process the imports themselves as needed: + /// - typechecking + /// - apply program transformations. + pub fn prepare_nocache<'ast>( + &mut self, + alloc: &'ast AstAlloc, + file_id: FileId, + initial_ctxt: &typecheck::Context<'ast>, + ) -> Result<(RichTerm, Vec), Error> { + let (ast, errs) = self.parse_nickel_nocache(alloc, file_id)?; + + if !errs.no_errors() { + return Err(Error::ParseErrors(errs)); + } + + let mut import_data = ImportData::new(); + let resolver = resolvers::AstResolver { + alloc: &alloc, + asts: &HashMap::new(), + new_asts: Vec::new(), + sources: self, + import_data: &mut import_data, + }; + + let wildcards = measure_runtime!( + "runtime:type_check", + typecheck( + alloc, + &ast, + initial_ctxt.clone(), + &resolver, + TypecheckMode::Walk + )? + ); + + let term = measure_runtime!("runtime:ast_conversion", ast.to_mainline()); + + let wildcards: Vec<_> = wildcards.iter().map(ToMainline::to_mainline).collect(); + + let term = transform::transform(term, Some(&wildcards)) + .map_err(|err| Error::ParseErrors(err.into()))?; + + Ok(( + term, + import_data + .imports + .get(&file_id) + .map(|ids| ids.iter().copied().collect()) + .unwrap_or_default(), + )) + } +} + +pub struct WildcardsCache { + /// The inferred type of wildcards for each `FileId`. + wildcards: HashMap, +} + +#[derive(Default, Clone)] +pub struct ImportData { + /// Map containing for each FileId a list of files they import (directly). + imports: HashMap>, + /// Map containing for each FileId a list of files importing them (directly). + rev_imports: HashMap>, +} + +impl ImportData { + fn new() -> Self { + Self::default() + } + + /// Returns the set of files that this file imports. + pub fn get_imports(&self, file: FileId) -> impl Iterator + '_ { + self.imports + .get(&file) + .into_iter() + .flat_map(|s| s.iter()) + .copied() + } + + /// Returns the set of files that import this file. + pub fn get_rev_imports(&self, file: FileId) -> impl Iterator + '_ { + self.rev_imports + .get(&file) + .into_iter() + .flat_map(|s| s.iter()) + .copied() + } + + /// Returns the set of files that transitively depend on this file. + pub fn get_rev_imports_transitive(&self, file: FileId) -> HashSet { + let mut ret = HashSet::new(); + let mut stack = vec![file]; + + while let Some(file) = stack.pop() { + for f in self.get_rev_imports(file) { + if ret.insert(f) { + stack.push(f); + } + } + } + + ret + } +} + +pub struct Caches { + pub terms: TermCache, + pub sources: SourceCache, + pub asts: AstCache, + pub wildcards: WildcardsCache, + pub import_data: ImportData, + /// Whether processing should try to continue even in case of errors. Needed by the NLS. + error_tolerance: ErrorTolerance, + #[cfg(debug_assertions)] + /// Skip loading the stdlib, used for debugging purpose + pub skip_stdlib: bool, +} + +impl Caches { + /// Parse a source and populate the corresponding entry in the cache, or do + /// nothing if the entry has already been parsed. Support multiple formats. + /// This function is always error tolerant, independently from `self.error_tolerant`. + fn parse_tolerant( + &mut self, + file_id: FileId, + format: InputFormat, + ) -> Result, ParseError> { + if let Some(TermEntry { parse_errs, .. }) = self.terms.terms.get(&file_id) { + Ok(CacheOp::Cached(parse_errs.clone())) + } else { + if let InputFormat::Nickel = format { + let (ast, parse_errs) = self + .asts + .parse_nickel(file_id, self.sources.files.source(file_id))?; + + self.terms.terms.insert( + file_id, + TermEntry { + term: ast.to_mainline(), + state: EntryState::Parsed, + parse_errs: parse_errs.clone(), + }, + ); + + Ok(CacheOp::Done(parse_errs)) + } else { + let (term, parse_errs) = self.sources.parse_other_nocache(file_id, format)?; + + self.terms.terms.insert( + file_id, + TermEntry { + term, + state: EntryState::Parsed, + parse_errs: parse_errs.clone(), + }, + ); + + Ok(CacheOp::Done(parse_errs)) + } + } + } + + /// Parse a source and populate the corresponding entry in the cache, or do + /// nothing if the entry has already been parsed. Support multiple formats. + /// This function is error tolerant if `self.error_tolerant` is `true`. + /// + /// # RFC007 + /// + /// This method populates both the ast cache and the term cache at once. + pub fn parse( + &mut self, + file_id: FileId, + format: InputFormat, + ) -> Result, ParseErrors> { + let result = self.parse_tolerant(file_id, format); + + match self.error_tolerance { + ErrorTolerance::Tolerant => result.map_err(|err| err.into()), + ErrorTolerance::Strict => match result? { + CacheOp::Done(e) | CacheOp::Cached(e) if !e.no_errors() => Err(e), + CacheOp::Done(_) => Ok(CacheOp::Done(ParseErrors::none())), + CacheOp::Cached(_) => Ok(CacheOp::Cached(ParseErrors::none())), + }, + } + } + + /// Typecheck an entry of the cache and update its state accordingly, or do nothing if the + /// entry has already been typechecked. Require that the corresponding source has been parsed. + /// If the source contains imports, recursively typecheck on the imports too. + /// + /// # RFC007 + /// + /// During the transition period between the old VM and the new bytecode VM, this method + /// performs typechecking on the new representation [crate::bytecode::ast::Ast], and is also + /// responsible for then converting the term to the legacy representation and populate the + /// corresponding term cache. + pub fn typecheck<'ast>( + &'ast mut self, + file_id: FileId, + initial_ctxt: &typecheck::Context<'ast>, + initial_mode: TypecheckMode, + ) -> Result, CacheError> { + self.asts.typecheck( + &mut self.sources, + &mut self.wildcards, + &mut self.terms, + &mut self.import_data, + file_id, + initial_ctxt, + initial_mode, + ) + } + + /// Prepare a source for evaluation: parse it, resolve the imports, typecheck it and apply + /// program transformations, if it was not already done. + pub fn prepare<'ast>( + &'ast mut self, + file_id: FileId, + initial_ctxt: &typecheck::Context<'ast>, + ) -> Result, Error> { + let mut result = CacheOp::Cached(()); + + let format = self + .sources + .file_paths + .get(&file_id) + .and_then(InputFormat::from_source_path) + .unwrap_or_default(); + + if let CacheOp::Done(_) = self.parse(file_id, format)? { + result = CacheOp::Done(()); + } + + let typecheck_res = self + .asts + .typecheck( + &mut self.sources, + &mut self.wildcards, + &mut self.terms, + &mut self.import_data, + file_id, + initial_ctxt, + TypecheckMode::Walk, + ) + .map_err(|cache_err| { + cache_err.unwrap_error( + "cache::prepare(): expected source to be parsed before typechecking", + ) + })?; + + if typecheck_res == CacheOp::Done(()) { + result = CacheOp::Done(()); + }; + + let transform_res = self + .terms + .transform(&self.wildcards, &self.import_data, file_id) + .map_err(|cache_err| { + Error::ParseErrors( + cache_err + .unwrap_error( + "cache::prepare(): expected source to be parsed before transformations", + ) + .into(), + ) + })?; + + if transform_res == CacheOp::Done(()) { + result = CacheOp::Done(()); + }; + + Ok(result) + } + + pub fn transform( + &mut self, + file_id: FileId, + ) -> Result, CacheError> { + self.terms + .transform(&self.wildcards, &self.import_data, file_id) + } + + /// Load and parse the standard library in the cache. + /// + /// # RFC007 + /// + /// This populates both the ast cache and the term cache at once. + pub fn load_stdlib(&mut self) -> Result, Error> { + let mut ret = CacheOp::Cached(()); + + for (_, file_id) in self.sources.files.stdlib_modules() { + let op = self.parse(file_id, InputFormat::Nickel)?; + ret = CacheOp::Done(()); + } + + Ok(ret) + } + + /// Typecheck the standard library. Currently only used in the test suite. + pub fn typecheck_stdlib(&mut self) -> Result, CacheError> { + // We have a small bootstraping problem: to typecheck the initial environment (that is the + // stdlib), we already need an initial term environment since stdlib parts may reference + // each other (for deciding type equality on contracts). But typechecking is performed + // before program transformations, so the term environment isn't the final initial + // evaluation environment. + // + // We have to create a temporary initial environment just for typechecking, which is + // dropped right after. However: + // 1. The stdlib is meant to stay relatively light. + // 2. Typechecking the standard library ought to occur only during development. We + // currently don't typecheck it for normal execution. + let initial_env = self.mk_type_ctxt().map_err(|err| match err { + CacheError::NotParsed => CacheError::NotParsed, + CacheError::Error(_) => unreachable!(), + })?; + + self.typecheck_stdlib_in_ctxt(&initial_env) + } + + /// Typecheck the stdlib, provided the initial typing environment. Has to be public because + /// it's used in benches. It probably does not have to be used for something else. + pub fn typecheck_stdlib_in_ctxt<'ast>( + &'ast mut self, + initial_ctxt: &typecheck::Context<'ast>, + ) -> Result, CacheError> { + let mut ret = CacheOp::Cached(()); + + for (_, stdlib_module_id) in self.sources.files.stdlib_modules() { + if let CacheOp::Done(()) = self.asts.typecheck( + &mut self.sources, + &mut self.wildcards, + &mut self.terms, + &mut self.import_data, + stdlib_module_id, + initial_ctxt, + TypecheckMode::Walk, + )? { + ret = CacheOp::Done(()); + } } + + Ok(ret) } - /// Extracts format embedded in SourcePath - pub fn from_source_path(source_path: &SourcePath) -> Option { - if let SourcePath::Path(_p, fmt) = source_path { - Some(*fmt) - } else { - None + /// Load, parse, and apply program transformations to the standard library. Do not typecheck for + /// performance reasons: this is done in the test suite. Return an initial environment + /// containing both the evaluation and type environments. If you only need the type environment, + /// use `load_stdlib` then `mk_type_env` to avoid transformations and evaluation preparation. + pub fn prepare_stdlib(&mut self, eval_cache: &mut EC) -> Result { + #[cfg(debug_assertions)] + if self.skip_stdlib { + return Ok(Envs::new()); } + self.load_stdlib()?; + let type_ctxt = self.mk_type_ctxt().unwrap(); + + self.sources + .files + .stdlib_modules() + // We need to handle the internals module separately. Each field + // is bound directly in the environment without evaluating it first, so we can't + // tolerate top-level let bindings that would be introduced by `transform`. + .try_for_each(|(module, file_id)| { + if let nickel_stdlib::StdlibModule::Internals = module { + self.terms + .transform_inner(&self.wildcards, &self.import_data, file_id)?; + } else { + self.transform(file_id)?; + } + Ok(()) + }) + .map_err(|cache_err: CacheError| { + Error::ParseErrors( + cache_err + .unwrap_error( + "cache::prepare_stdlib(): expected standard library to be parsed", + ) + .into(), + ) + })?; + let eval_env = self.mk_eval_env(eval_cache).unwrap(); + Ok(Envs { + eval_env, + type_ctxt, + }) } } @@ -150,9 +1032,6 @@ pub struct Cache { import_paths: Vec, /// The map used to resolve package imports. package_map: Option, - #[cfg(debug_assertions)] - /// Skip loading the stdlib, used for debugging purpose - pub skip_stdlib: bool, } /// The error tolerance mode used by the parser. The NLS needs to try to @@ -209,7 +1088,7 @@ enum SourceKind { Memory, } -/// Cache keys for sources. +/// Cache entries for sources. /// /// A source can be either a snippet input by the user, in which case it is only identified by its /// name in the name-id table, and a unique `FileId`. On the other hand, different versions of the @@ -257,8 +1136,6 @@ pub enum EntryState { Closurized, } -pub enum EntryOrigin {} - /// The result of a cache operation, such as parsing, typechecking, etc. which can either have /// performed actual work, or have done nothing if the corresponding entry was already at a later /// stage. @@ -322,7 +1199,7 @@ pub enum SourcePath { Path(PathBuf, InputFormat), /// A subrange of a file at the given path. /// - /// This is used by nls to analyze small parts of files that don't fully parse. The + /// This is used by NLS to analyze small parts of files that don't fully parse. The /// original file path is preserved, because it's needed for resolving imports. Snippet(PathBuf), Std(StdlibModule), @@ -549,10 +1426,10 @@ impl Cache { Ok(CacheOp::Cached(parse_errs.clone())) } else { if let InputFormat::Nickel = format { - self.asts.insert_with_result(file_id, |alloc| { - let (ast, parse_errs) = self.parse_nickel_nocache(alloc, file_id)?; - Ok((ast, CacheOp::Done(parse_errs))) - }) + let (_, parse_errs) = self + .asts + .parse_nickel(file_id, self.files.source(file_id))?; + Ok(CacheOp::Done(parse_errs)) } else { let (term, parse_errs) = self.parse_other_nocache(file_id, format)?; @@ -598,16 +1475,7 @@ impl Cache { alloc: &'ast AstAlloc, file_id: FileId, ) -> Result<(Ast<'ast>, ParseErrors), ParseError> { - let (t, parse_errs) = measure_runtime!( - "runtime:parse:nickel", - parser::grammar::TermParser::new().parse_tolerant( - alloc, - file_id, - Lexer::new(self.files.source(file_id)) - )? - ); - - Ok((t, parse_errs)) + parse_nickel(alloc, file_id, self.files.source(file_id)) } /// Parse a source that isn't Nickel without querying nor populating the cache. Support @@ -688,67 +1556,18 @@ impl Cache { } } - /// Typecheck an entry of the cache and update its state accordingly, or do nothing if the - /// entry has already been typechecked. Require that the corresponding source has been parsed. - /// If the source contains imports, recursively typecheck on the imports too. - /// - /// # RFC007 - /// - /// During the transition period between the old VM and the new bytecode VM, this method - /// performs typechecking on the new representation [crate::bytecode::ast::Ast], and is also - /// responsible for then converting the term to the legacy representation and populate the - /// corresponding term cache. - pub fn typecheck( - &mut self, - file_id: FileId, - initial_ctxt: &typecheck::Context<'_>, - initial_mode: TypecheckMode, - ) -> Result, CacheError> { - // If the term cache is populated, given the current split of the pipeline between the old - // and the new AST, the term MUST have been typechecked. - if self.terms.get(&file_id).is_some() { - return Ok(CacheOp::Cached(())); - } - - let (Some(ast), Some(alloc)) = (self.asts.get(&file_id), self.asts.get_alloc()) else { - return Err(CacheError::NotParsed); - }; - - let wildcards = measure_runtime!( - "runtime:type_check", - typecheck(alloc, &ast, initial_ctxt.clone(), self, initial_mode)? - ); - - self.update_state(file_id, EntryState::Typechecking); - self.wildcards.insert(file_id, wildcards.iter().map(ToMainline::to_mainline).collect()); - - if let Some(imports) = self.imports.get(&file_id).cloned() { - for f in imports.into_iter() { - self.typecheck(f, initial_ctxt, initial_mode)?; - } - } - - self.update_state(file_id, EntryState::Typechecked); - - Ok(CacheOp::Done(())) - } - - /// Apply program transformations to an entry of the cache, and update its state accordingly, - /// or do nothing if the entry has already been transformed. Require that the corresponding - /// source has been parsed. - /// If the source contains imports, recursively perform transformations on the imports too. - pub fn transform( - &mut self, + fn transform_( + terms: &mut HashMap, + wildcards: &HashMap, file_id: FileId, ) -> Result, CacheError> { - match self.entry_state(file_id) { + match terms.get(&file_id).map(|entry| entry.state) { Some(state) if state >= EntryState::Transformed => Ok(CacheOp::Cached(())), Some(state) if state >= EntryState::Parsed => { if state < EntryState::Transforming { - let cached_term = self.terms.remove(&file_id).unwrap(); - let term = - transform::transform(cached_term.term, self.wildcards.get(&file_id))?; - self.terms.insert( + let cached_term = terms.remove(&file_id).unwrap(); + let term = transform::transform(cached_term.term, wildcards.get(&file_id))?; + terms.insert( file_id, TermEntry { term, @@ -757,12 +1576,13 @@ impl Cache { }, ); - if let Some(imports) = self.imports.get(&file_id).cloned() { - for f in imports.into_iter() { - self.transform(f)?; - } - } - self.update_state(file_id, EntryState::Transformed); + // if let Some(imports) = self.imports.get(&file_id).cloned() { + // for f in imports.into_iter() { + // self.transform(f)?; + // } + // } + + Self::update_state_(terms, file_id, EntryState::Transformed); } Ok(CacheOp::Done(())) } @@ -770,6 +1590,17 @@ impl Cache { } } + /// Apply program transformations to an entry of the cache, and update its state accordingly, + /// or do nothing if the entry has already been transformed. Require that the corresponding + /// source has been parsed. + /// If the source contains imports, recursively perform transformations on the imports too. + pub fn transform( + &mut self, + file_id: FileId, + ) -> Result, CacheError> { + Self::transform_(&mut self.terms, &self.wildcards, file_id) + } + /// Applies a custom transform to an input and its imports, leaving them /// in the same state as before. Requires that the input has been parsed. /// In order for the transform to apply to imports, they need to have been @@ -1045,105 +1876,6 @@ impl Cache { } } - /// Prepare a source for evaluation: parse it, resolve the imports, - /// typecheck it and apply program transformations, - /// if it was not already done. - pub fn prepare( - &mut self, - file_id: FileId, - initial_ctxt: &typecheck::Context<'_>, - ) -> Result, Error> { - let mut result = CacheOp::Cached(()); - - let format = self - .file_paths - .get(&file_id) - .and_then(InputFormat::from_source_path) - .unwrap_or_default(); - if let CacheOp::Done(_) = self.parse(file_id, format)? { - result = CacheOp::Done(()); - } - - let import_res = self.resolve_imports(file_id).map_err(|cache_err| { - cache_err.unwrap_error( - "cache::prepare(): expected source to be parsed before imports resolutions", - ) - })?; - if let CacheOp::Done(..) = import_res { - result = CacheOp::Done(()); - } - - let typecheck_res = self - .typecheck(file_id, initial_ctxt, TypecheckMode::Walk) - .map_err(|cache_err| { - cache_err.unwrap_error( - "cache::prepare(): expected source to be parsed before typechecking", - ) - })?; - if typecheck_res == CacheOp::Done(()) { - result = CacheOp::Done(()); - }; - - let transform_res = self.transform(file_id).map_err(|cache_err| { - Error::ParseErrors( - cache_err - .unwrap_error( - "cache::prepare(): expected source to be parsed before transformations", - ) - .into(), - ) - })?; - - if transform_res == CacheOp::Done(()) { - result = CacheOp::Done(()); - }; - - Ok(result) - } - - /// Same as [Self::prepare], but do not use nor populate the cache. Used for inputs which are - /// known to not be reused. - /// - /// In this case, the caller has to process the imports themselves as needed: - /// - typechecking - /// - resolve imports performed inside these imports. - /// - apply program transformations. - pub fn prepare_nocache<'ast>( - &mut self, - alloc: &'ast AstAlloc, - file_id: FileId, - initial_ctxt: &typecheck::Context<'ast>, - ) -> Result<(RichTerm, Vec), Error> { - let (ast, errs) = self.parse_nickel_nocache(alloc, file_id)?; - - if !errs.no_errors() { - return Err(Error::ParseErrors(errs)); - } - - let import_resolution::strict::ResolveResult { - transformed_term: term, - resolved_ids: pending, - } = import_resolution::strict::resolve_imports(ast.to_mainline(), self)?; - - let wildcards = measure_runtime!( - "runtime:type_check", - typecheck( - alloc, - &ast, - initial_ctxt.clone(), - self, - TypecheckMode::Walk - )? - ); - - let wildcards: Vec<_> = wildcards.iter().map(ToMainline::to_mainline).collect(); - - let term = transform::transform(term, Some(&wildcards)) - .map_err(|err| Error::ParseErrors(err.into()))?; - - Ok((term, pending)) - } - /// Retrieve the name of a source given an id. pub fn name(&self, file_id: FileId) -> &OsStr { self.files.name(file_id) @@ -1213,6 +1945,17 @@ impl Cache { .map(|TermEntry { state, .. }| std::mem::replace(state, new)) } + /// Update the state of an entry. Return the previous state. + pub fn update_state_( + terms: &mut HashMap, + file_id: FileId, + new: EntryState, + ) -> Option { + terms + .get_mut(&file_id) + .map(|TermEntry { ref mut state, .. }| std::mem::replace(state, new)) + } + /// Remove the cached term associated with this id, and any cached terms /// that import it. /// @@ -1262,56 +2005,22 @@ impl Cache { } /// Retrieve a reference to a cached term. - pub fn get_ref(&self, file_id: FileId) -> Option<&RichTerm> { - self.terms.get(&file_id).map(|TermEntry { term, .. }| term) - } - - /// Returns true if a particular file id represents a Nickel standard library file, false - /// otherwise. - pub fn is_stdlib_module(&self, file: FileId) -> bool { - self.files.is_stdlib(file) - } - - /// Retrieve the FileId for a given standard libray module. - pub fn get_submodule_file_id(&self, module: StdlibModule) -> Option { - self.files - .stdlib_modules() - .find(|(m, _id)| m == &module) - .map(|(_, id)| id) - } - - /// Returns the set of files that this file imports. - pub fn get_imports(&self, file: FileId) -> impl Iterator + '_ { - self.imports - .get(&file) - .into_iter() - .flat_map(|s| s.iter()) - .copied() - } - - /// Returns the set of files that import this file. - pub fn get_rev_imports(&self, file: FileId) -> impl Iterator + '_ { - self.rev_imports - .get(&file) - .into_iter() - .flat_map(|s| s.iter()) - .copied() - } - - /// Returns the set of files that transitively depend on this file. - pub fn get_rev_imports_transitive(&self, file: FileId) -> HashSet { - let mut ret = HashSet::new(); - let mut stack = vec![file]; + pub fn get_ref(&self, file_id: FileId) -> Option<&RichTerm> { + self.terms.get(&file_id).map(|TermEntry { term, .. }| term) + } - while let Some(file) = stack.pop() { - for f in self.get_rev_imports(file) { - if ret.insert(f) { - stack.push(f); - } - } - } + /// Returns true if a particular file id represents a Nickel standard library file, false + /// otherwise. + pub fn is_stdlib_module(&self, file: FileId) -> bool { + self.files.is_stdlib(file) + } - ret + /// Retrieve the FileId for a given standard libray module. + pub fn get_submodule_file_id(&self, module: StdlibModule) -> Option { + self.files + .stdlib_modules() + .find(|(m, _id)| m == &module) + .map(|(_, id)| id) } /// Load and parse the standard library in the cache. @@ -1327,83 +2036,9 @@ impl Cache { Ok(ret) } - /// Typecheck the standard library. Currently only used in the test suite. - pub fn typecheck_stdlib(&mut self) -> Result, CacheError> { - // We have a small bootstraping problem: to typecheck the initial environment, we already - // need an initial evaluation environment, since stdlib parts may reference each other. But - // typechecking is performed before program transformations, so this environment is not - // the final one. We have create a temporary initial environment just for typechecking, - // which is dropped right after. However: - // 1. The stdlib is meant to stay relatively light. - // 2. Typechecking the standard library ought to occur only during development. We - // currently don't typecheck it for normal execution. - let initial_env = self.mk_type_ctxt().map_err(|err| match err { - CacheError::NotParsed => CacheError::NotParsed, - CacheError::Error(_) => unreachable!(), - })?; - self.typecheck_stdlib_(&initial_env) - } - - /// Typecheck the stdlib, provided the initial typing environment. Has to be public because - /// it's used in benches. It probably does not have to be used for something else. - pub fn typecheck_stdlib_( - &mut self, - initial_ctxt: &typecheck::Context<'_>, - ) -> Result, CacheError> { - self.files - .stdlib_modules() - .try_fold(CacheOp::Cached(()), |cache_op, (_, file_id)| { - match self.typecheck(file_id, initial_ctxt, TypecheckMode::Walk)? { - done @ CacheOp::Done(()) => Ok(done), - _ => Ok(cache_op), - } - }) - } - - /// Load, parse, and apply program transformations to the standard library. Do not typecheck for - /// performance reasons: this is done in the test suite. Return an initial environment - /// containing both the evaluation and type environments. If you only need the type environment, - /// use `load_stdlib` then `mk_type_env` to avoid transformations and evaluation preparation. - pub fn prepare_stdlib(&mut self, eval_cache: &mut EC) -> Result { - #[cfg(debug_assertions)] - if self.skip_stdlib { - return Ok(Envs::new()); - } - self.load_stdlib()?; - let type_ctxt = self.mk_type_ctxt().unwrap(); - - self.files - .stdlib_modules() - // We need to handle the internals module separately. Each field - // is bound directly in the environment without evaluating it first, so we can't - // tolerate top-level let bindings that would be introduced by `transform`. - .try_for_each(|(module, file_id)| { - if let nickel_stdlib::StdlibModule::Internals = module { - self.transform_inner(file_id)?; - } else { - self.transform(file_id)?; - } - Ok(()) - }) - .map_err(|cache_err: CacheError| { - Error::ParseErrors( - cache_err - .unwrap_error( - "cache::prepare_stdlib(): expected standard library to be parsed", - ) - .into(), - ) - })?; - let eval_env = self.mk_eval_env(eval_cache).unwrap(); - Ok(Envs { - eval_env, - type_ctxt, - }) - } - /// Generate the initial typing context from the list of `file_ids` corresponding to the /// standard library parts. - pub fn mk_type_ctxt(&self) -> Result, CacheError> { + pub fn mk_type_ctxt<'ast>(&'ast self) -> Result, CacheError> { let stdlib_terms_vec: Vec<(StdlibModule, Ast<'_>)> = self .files .stdlib_modules() @@ -1417,7 +2052,7 @@ impl Cache { ) }) .collect(); - Ok(typecheck::mk_initial_ctxt(self.alloc, &stdlib_terms_vec).unwrap()) + Ok(typecheck::mk_initial_ctxt(self.asts.get_alloc(), &stdlib_terms_vec).unwrap()) } /// Generate the initial evaluation environment from the list of `file_ids` corresponding to the @@ -1501,7 +2136,7 @@ pub trait ImportResolver { fn get_path(&self, file_id: FileId) -> Option<&OsStr>; } -impl<'ast> ImportResolver for Cache<'ast> { +impl ImportResolver for Cache { fn resolve( &mut self, import: &Import, @@ -1608,6 +2243,153 @@ impl<'ast> ImportResolver for Cache<'ast> { } } +impl ImportResolver for Caches { + fn resolve( + &mut self, + import: &Import, + parent: Option, + pos: &TermPos, + ) -> Result<(ResolvedTerm, FileId), ImportError> { + let (possible_parents, path, pkg_id, format) = match import { + Import::Path { path, format } => { + // `parent` is the file that did the import. We first look in its containing directory, followed by + // the directories in the import path. + let mut parent_path = parent + .and_then(|p| self.get_path(p)) + .map(PathBuf::from) + .unwrap_or_default(); + parent_path.pop(); + + ( + std::iter::once(parent_path) + .chain(self.sources.import_paths.iter().cloned()) + .collect(), + Path::new(path), + None, + *format, + ) + } + Import::Package { id } => { + let package_map = self + .sources + .package_map + .as_ref() + .ok_or(ImportError::NoPackageMap { pos: *pos })?; + let parent_path = parent + .and_then(|p| self.sources.packages.get(&p)) + .map(PathBuf::as_path); + let pkg_path = package_map.get(parent_path, *id, *pos)?; + ( + vec![pkg_path.to_owned()], + Path::new("main.ncl"), + Some(pkg_path.to_owned()), + // Packages are always in nickel format + InputFormat::Nickel, + ) + } + }; + + // Try to import from all possibilities, taking the first one that succeeds. + let (id_op, path_buf) = possible_parents + .iter() + .find_map(|parent| { + let mut path_buf = parent.clone(); + path_buf.push(path); + self.sources + .get_or_add_file(&path_buf, format) + .ok() + .map(|x| (x, path_buf)) + }) + .ok_or_else(|| { + let parents = possible_parents + .iter() + .map(|p| p.to_string_lossy()) + .collect::>(); + ImportError::IOError( + path.to_string_lossy().into_owned(), + format!("could not find import (looked in [{}])", parents.join(", ")), + *pos, + ) + })?; + + let (result, file_id) = match id_op { + CacheOp::Cached(id) => (ResolvedTerm::FromCache, id), + CacheOp::Done(id) => (ResolvedTerm::FromFile { path: path_buf }, id), + }; + + if let Some(parent) = parent { + self.import_data + .imports + .entry(parent) + .or_default() + .insert(file_id); + self.import_data + .rev_imports + .entry(file_id) + .or_default() + .insert(parent); + } + + self.parse(file_id, format) + .map_err(|err| ImportError::ParseErrors(err, *pos))?; + + if let Some(pkg_id) = pkg_id { + self.sources.packages.insert(file_id, pkg_id); + } + + Ok((result, file_id)) + } + + fn files(&self) -> &Files { + &self.sources.files + } + + fn get(&self, file_id: FileId) -> Option { + self.terms + .terms + .get(&file_id) + .map(|TermEntry { term, state, .. }| { + debug_assert!(*state >= EntryState::ImportsResolved); + term.clone() + }) + } + + fn get_path(&self, file_id: FileId) -> Option<&OsStr> { + self.sources + .file_paths + .get(&file_id) + .and_then(|p| p.try_into().ok()) + } +} + +pub trait AstImportResolver<'ast> { + /// Resolve an import. + /// + /// Read and store the content of an import, put it in the file cache (or get it from there if + /// it is cached), then parse it and return the corresponding term and file id. + /// + /// The term and the path are provided only if the import is processed for the first time. + /// Indeed, at import resolution phase, the term of an import encountered for the first time is + /// queued to be processed (e.g. having its own imports resolved). The path is needed to + /// resolve nested imports relatively to this parent. Only after this processing the term is + /// inserted back in the cache. On the other hand, if it has been resolved before, it is + /// already transformed in the cache and do not need further processing. + fn resolve( + &mut self, + import: &Import, + parent: Option, + pos: &TermPos, + ) -> Result<(ResolvedTerm, Ast<'ast>), ImportError>; + + // Return a reference to the file database. + // fn files(&self) -> &Files; + + // Get a resolved import from the term cache. + // fn get(&self, file_id: FileId) -> Option; + // Return the (potentially normalized) file path corresponding to the ID of a resolved import. + // fn get_path(&self, file_id: FileId) -> Option<&OsStr>; +} + /// Normalize the path of a file for unique identification in the cache. /// /// The returned path will be an absolute path. @@ -1704,6 +2486,42 @@ pub fn timestamp(path: impl AsRef) -> io::Result { pub mod resolvers { use super::*; + pub struct AstResolver<'ast, 'cache, 'input> { + /// The ast allocator used to parse new sources. + pub(super) alloc: &'ast AstAlloc, + /// The ast cache before the start of import resolution. Because of technicalities of the + /// self-referential [super::AstCache], we can only take it as an immutable reference. + /// Newly imported ASTs are put in [Self::new_asts]. + pub(super) asts: &'cache HashMap, ParseErrors)>, + /// Newly imported ASTs, to be appended to the AST cache after resolution. + pub(super) new_asts: Vec<(FileId, Ast<'ast>)>, + /// The source cache where new sources will be stored. + pub(super) sources: &'input mut SourceCache, + /// Direct and reverse dependencies of files (with respect to imports). + pub(super) import_data: &'cache mut ImportData, + } + + impl<'ast, 'cache, 'input> AstResolver<'ast, 'cache, 'input> { + pub(super) fn append_to_cache(self, asts: &mut HashMap, ParseErrors)>) { + asts.extend( + self.new_asts + .into_iter() + .map(|(id, ast)| (id, (ast, ParseErrors::default()))), + ); + } + } + + impl<'ast, 'cache, 'input> AstImportResolver<'ast> for AstResolver<'ast, 'cache, 'input> { + fn resolve( + &mut self, + import: &Import, + parent: Option, + pos: &TermPos, + ) -> Result<(ResolvedTerm, Ast<'ast>), ImportError> { + todo!() + } + } + /// A dummy resolver that panics when asked to do something. Used to test code that contains no /// import. pub struct DummyResolver {} @@ -1810,6 +2628,22 @@ pub mod resolvers { } } +// Parse a Nickel source. +fn parse_nickel<'input, 'ast>( + // We take the allocator explicitly, to make sure `self.asts` is properly initialized + // before calling this function, and won't be dropped . + alloc: &'ast AstAlloc, + file_id: FileId, + source: &'input str, +) -> Result<(Ast<'ast>, ParseErrors), ParseError> { + let (t, parse_errs) = measure_runtime!( + "runtime:parse:nickel", + parser::grammar::TermParser::new().parse_tolerant(alloc, file_id, Lexer::new(source))? + ); + + Ok((t, parse_errs)) +} + /// Temporary AST cache (for the new [crate::bytecode::ast::Ast]) that holds the owned allocator of /// the AST nodes. /// @@ -1817,127 +2651,182 @@ pub mod resolvers { /// it's been put in its own module). Please do not mess with [ast_cache] unless you know what /// you're doing. mod ast_cache { - use super::{Ast, AstAlloc, FileId, HashMap}; + use super::*; #[derive(Debug)] - struct InnerAstCache { + pub struct AstCache { alloc: AstAlloc, /// **Caution**: the ASTs stored here are surely _not_ static, they are pointing to inside /// `alloc`. We just use `'static` as a place-holder. However, we can't currently express - /// such self-referential structure in safe Rust (well, the AST nodes are actually stored - /// in the heap by the allocator, so it's not strictly speaking self-referential, but the - /// lifetime of `Ast` is still tied to `self`) - asts: HashMap>, + /// such a self-referential structure in safe Rust, where the lifetime of `Ast` is tied to + /// `self`. + asts: HashMap, ParseErrors)>, } - impl InnerAstCache { - fn new() -> Self { - InnerAstCache { + impl AstCache { + pub fn new() -> Self { + AstCache { alloc: AstAlloc::new(), asts: HashMap::new(), } } + pub fn clear(&mut self) { + // We release the memory previously used by the allocator. Note that creating a new + // allocator doesn't require heap allocation, or at worst very few (we just allocate + // empty vectors and arenas, which usually have a capacity of 0 by default). + self.alloc = AstAlloc::new(); + self.asts.clear(); + } + /// Returns the underlying allocator, which might be required to call various helpers. - fn get_alloc(&self) -> &AstAlloc { + pub fn get_alloc(&self) -> &AstAlloc { &self.alloc } /// Retrieve the AST associated with a file id. - fn get<'ast>(&'ast self, file_id: &FileId) -> Option> { - self.asts.get(file_id).cloned() + pub fn get<'ast>(&'ast self, file_id: &FileId) -> Option> { + self.asts.get(file_id).map(|(ast, _errs)| ast).cloned() } - /// Takes a closure that builds an AST node from an allocator, a file ID, and populate the - /// corresponding entry in the cache with the AST. Returns the previously cached AST, if - /// any. - fn insert_with_alloc<'ast, F>(&'ast mut self, file_id: FileId, f: F) -> Option> - where - F: for<'a> FnOnce(&'ast AstAlloc) -> Ast<'ast>, - { - let ast = f(&self.alloc); + // /// Takes a closure that builds an AST node from an allocator, a file ID, and populate the + // /// corresponding entry in the cache with the AST. Returns the previously cached AST, if + // /// any. + // fn insert_with_alloc<'ast, F>(&'ast mut self, file_id: FileId, f: F) -> Option> + // where + // F: for<'a> FnOnce(&'ast AstAlloc) -> Ast<'ast>, + // { + // let ast = f(&self.alloc); + // // Safety: we are transmuting the lifetime of the AST from `'ast` to `'static`. This is + // // unsafe in general, but we never use or leak any `'static` reference. It's just a + // // placeholder. We only store such `Ast<'static>` in `asts`, and return them as `'a` + // // references where `self: 'a` in `get()`. + // // + // // Thus, the `'static` lifetime isn't observable from outsideof `AstCache`. + // let promoted_ast = unsafe { std::mem::transmute::, Ast<'static>>(ast) }; + // self.asts.insert(file_id, promoted_ast) + // } + + // pub(super) fn insert_with_result<'ast, F, T, E>( + // &'ast mut self, + // file_id: FileId, + // f: F, + // ) -> Result + // where + // F: for<'a> FnOnce(&'ast AstAlloc) -> Result<(Ast<'ast>, T), E>, + // { + // let (ast, result) = f(&self.alloc)?; + // // Safety: we are transmuting the lifetime of the AST from `'ast` to `'static`. This is + // // unsafe in general, but we never use or leak any `'static` reference. It's just a + // // placeholder. We only store such `Ast<'static>` in `asts`, and return them as `'a` + // // references where `self: 'a` in `get()`. + // // + // // Thus, the `'static` lifetime isn't observable from outsideof `AstCache`. + // let promoted_ast = unsafe { std::mem::transmute::, Ast<'static>>(ast) }; + // let _ = self.asts.insert(file_id, promoted_ast); + // + // Ok(result) + // } + + pub fn parse_nickel<'ast>( + &'ast mut self, + file_id: FileId, + source: &str, + ) -> Result<(Ast<'ast>, ParseErrors), ParseError> { + let (ast, errs) = parse_nickel(&self.alloc, file_id, source)?; // Safety: we are transmuting the lifetime of the AST from `'ast` to `'static`. This is // unsafe in general, but we never use or leak any `'static` reference. It's just a // placeholder. We only store such `Ast<'static>` in `asts`, and return them as `'a` // references where `self: 'a` in `get()`. // // Thus, the `'static` lifetime isn't observable from outsideof `AstCache`. - let promoted_ast = unsafe { std::mem::transmute::, Ast<'static>>(ast) }; - self.asts.insert(file_id, promoted_ast) + let promoted_ast = unsafe { std::mem::transmute::, Ast<'static>>(ast.clone()) }; + self.asts + .insert(file_id, (promoted_ast.clone(), errs.clone())); + Ok((ast, errs)) } - pub(super) fn insert_with_result<'ast, F, T, E>(&'ast mut self, file_id: FileId, f: F) -> Result - where - F: for<'a> FnOnce(&'ast AstAlloc) -> Result<(Ast<'ast>, T), E>, - { - let (ast, result) = f(&self.alloc)?; - // Safety: we are transmuting the lifetime of the AST from `'ast` to `'static`. This is - // unsafe in general, but we never use or leak any `'static` reference. It's just a - // placeholder. We only store such `Ast<'static>` in `asts`, and return them as `'a` - // references where `self: 'a` in `get()`. - // - // Thus, the `'static` lifetime isn't observable from outsideof `AstCache`. - let promoted_ast = unsafe { std::mem::transmute::, Ast<'static>>(ast) }; - let _ = self.asts.insert(file_id, promoted_ast); - - Ok(result) + pub fn remove<'ast>(&'ast mut self, file_id: FileId) -> Option<(Ast<'ast>, ParseErrors)> { + self.asts.remove(&file_id) } - } - - /// A cache for [bytecode::ast::Ast] nodes. - /// - /// To make it possible to drop the AST nodes once typechecking has been performed, [AstCache] - /// is a wrapper around an optional [InnerAstCache]. Dropping the cache resets the option to - /// `None`. If one tries to insert into a dropped cache, the cache will automatically be - /// reinitialized, such that getting from and inserting into the cache are transparent - /// operations, whether the cache is actually live or not. - #[derive(Debug)] - pub(super) struct AstCache(Option); - impl AstCache { - pub(super) fn new() -> Self { - AstCache(Some(InnerAstCache::new())) - } + /// Typecheck an entry of the cache and update its state accordingly, or do nothing if the + /// entry has already been typechecked. Require that the corresponding source has been parsed. + /// If the source contains imports, recursively typecheck on the imports too. + /// + /// # RFC007 + /// + /// During the transition period between the old VM and the new bytecode VM, this method + /// performs typechecking on the new representation [crate::bytecode::ast::Ast], and is also + /// responsible for then converting the term to the legacy representation and populate the + /// corresponding term cache. + pub fn typecheck<'ast, 'input>( + &'ast mut self, + sources: &'input mut SourceCache, + wildcards: &mut WildcardsCache, + terms: &mut TermCache, + import_data: &mut ImportData, + file_id: FileId, + initial_ctxt: &typecheck::Context<'ast>, + initial_mode: TypecheckMode, + ) -> Result, CacheError> { + // If the term cache is populated, given the current split of the pipeline between the old + // and the new AST, the term MUST have been typechecked. + if terms.terms.get(&file_id).is_some() { + return Ok(CacheOp::Cached(())); + } - /// Clear the cache and drop alls the allocated AST node. - pub(super) fn clear(&mut self) { - self.0 = None; - } + let Some((ast, _errs)) = self.asts.get(&file_id) else { + return Err(CacheError::NotParsed); + }; - pub(super) fn get_alloc(&self) -> Option<&AstAlloc> { - self.0.as_ref().map(InnerAstCache::get_alloc) - } + let resolver = resolvers::AstResolver { + alloc: &self.alloc, + asts: &self.asts, + new_asts: Vec::new(), + import_data, + sources, + }; - pub(super) fn get<'ast>(&'ast self, file_id: &FileId) -> Option> { - self.0.as_ref().and_then(|cache| cache.get(file_id)) - } + let wildcards_map = measure_runtime!( + "runtime:type_check", + typecheck( + &self.alloc, + &ast, + initial_ctxt.clone(), + &resolver, + initial_mode + )? + ); - pub(super) fn insert_with_alloc<'ast, F>( - &'ast mut self, - file_id: FileId, - f: F, - ) -> Option> - where - F: for<'a> FnOnce(&'ast AstAlloc) -> Ast<'ast>, - { - if self.0.is_none() { - self.0 = Some(InnerAstCache::new()); - } + self.asts + .extend(resolver.new_asts.into_iter().map(|(id, ast)| { + ( + id, + ( + // Safety: the implementation of AstResolver can only allocate new ASTs from + // `self.alloc` (or via leaked data), which thus are guaranteed to be live as long as + // `self`. As explained in the documentation of [Self], `'static` is just a non + // observable placeholder here. What counts is that the asts in the cache live as long + // as self. + unsafe { std::mem::transmute::, Ast<'static>>(ast) }, + ParseErrors::default(), + ), + ) + })); - // unwrap(): we just initialized the cache if it was `None` in the if above. - self.0.as_mut().unwrap().insert_with_alloc(file_id, f) - } + wildcards.wildcards.insert( + file_id, + wildcards_map.iter().map(ToMainline::to_mainline).collect(), + ); - pub(super) fn insert_with_result<'ast, F, T, E>(&'ast mut self, file_id: FileId, f: F) -> Result - where - F: for<'a> FnOnce(&'ast AstAlloc) -> Result<(Ast<'ast>, T), E>, - { - if self.0.is_none() { - self.0 = Some(InnerAstCache::new()); - } + // We can't use `update_state()` here because `self.asts.get_alloc()` must be live for the + // whole duration of the function (`'ast`) to match the provided typing context, which + // would conflict with borrowing `self` mutably. However, we can modify `terms` directly, + // as the compiler is able to see that we borrow a disjoint field. + terms.update_state(file_id, EntryState::Typechecked); - // unwrap(): we just initialized the cache if it was `None` in the if above. - self.0.as_mut().unwrap().insert_with_result(file_id, f) + Ok(CacheOp::Done(())) } } @@ -1948,7 +2837,7 @@ mod ast_cache { /// cache. impl Clone for AstCache { fn clone(&self) -> Self { - AstCache(None) + AstCache::new() } } } diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index abf7d0563..c0bfde99b 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -391,7 +391,7 @@ impl PartialEq for Term { } } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] /// Specifies where something should be imported from. pub enum Import { Path { diff --git a/core/src/typecheck/mod.rs b/core/src/typecheck/mod.rs index a55a325f4..cf28c4f08 100644 --- a/core/src/typecheck/mod.rs +++ b/core/src/typecheck/mod.rs @@ -59,7 +59,7 @@ use crate::{ pattern::bindings::Bindings as _, record::FieldDef, typ::*, Annotation, Ast, AstAlloc, MatchBranch, Node, StringChunk, TryConvert, }, - cache::ImportResolver, + cache::AstImportResolver, environment::Environment, error::TypecheckError, identifier::{Ident, LocIdent}, @@ -1331,7 +1331,7 @@ pub fn env_add_term<'ast>( env: &mut TypeEnv<'ast>, ast: &Ast<'ast>, term_env: &TermEnv<'ast>, - resolver: &dyn ImportResolver, + resolver: &dyn AstImportResolver<'ast>, ) -> Result<(), EnvBuildError<'ast>> { match &ast.node { Node::Record(record) => { @@ -1359,7 +1359,7 @@ pub fn env_add<'ast>( id: LocIdent, ast: &Ast<'ast>, term_env: &TermEnv<'ast>, - resolver: &dyn ImportResolver, + resolver: &dyn AstImportResolver<'ast>, ) { env.insert( id.ident(), @@ -1380,7 +1380,7 @@ pub fn env_add<'ast>( /// refined/reborrowed during recursive calls. pub struct State<'ast, 'local> { /// The import resolver, to retrieve and typecheck imports. - resolver: &'local dyn ImportResolver, + resolver: &'local dyn AstImportResolver<'ast>, /// The unification table. table: &'local mut UnifTable<'ast>, /// Row constraints. @@ -1418,7 +1418,7 @@ pub fn typecheck<'ast>( alloc: &'ast AstAlloc, ast: &Ast<'ast>, initial_ctxt: Context<'ast>, - resolver: &impl ImportResolver, + resolver: &impl AstImportResolver<'ast>, initial_mode: TypecheckMode, ) -> Result, TypecheckError> { typecheck_visit(alloc, ast, initial_ctxt, resolver, &mut (), initial_mode) @@ -1430,7 +1430,7 @@ pub fn typecheck_visit<'ast, V>( ast_alloc: &'ast AstAlloc, ast: &Ast<'ast>, initial_ctxt: Context<'ast>, - resolver: &impl ImportResolver, + resolver: &impl AstImportResolver<'ast>, visitor: &mut V, initial_mode: TypecheckMode, ) -> Result, TypecheckError> @@ -2702,7 +2702,7 @@ pub trait HasApparentType<'ast> { &self, ast_alloc: &'ast AstAlloc, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, + resolver: Option<&dyn AstImportResolver<'ast>>, ) -> ApparentType<'ast>; } @@ -2714,7 +2714,7 @@ impl<'ast> HasApparentType<'ast> for FieldDef<'ast> { &self, ast_alloc: &'ast AstAlloc, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, + resolver: Option<&dyn AstImportResolver<'ast>>, ) -> ApparentType<'ast> { self.metadata .annotation @@ -2735,7 +2735,7 @@ impl<'ast> HasApparentType<'ast> for Node<'ast> { &self, ast_alloc: &'ast AstAlloc, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, + resolver: Option<&dyn AstImportResolver<'ast>>, ) -> ApparentType<'ast> { use crate::files::FileId; @@ -2754,7 +2754,7 @@ impl<'ast> HasApparentType<'ast> for Node<'ast> { ast_alloc: &'ast AstAlloc, node: &Node<'ast>, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, + resolver: Option<&dyn AstImportResolver<'ast>>, _imports_seen: HashSet, ) -> ApparentType<'ast> { match node { diff --git a/core/src/typecheck/record.rs b/core/src/typecheck/record.rs index 2539982d9..946327980 100644 --- a/core/src/typecheck/record.rs +++ b/core/src/typecheck/record.rs @@ -528,7 +528,7 @@ impl<'ast> HasApparentType<'ast> for ResolvedField<'ast> { &self, ast_alloc: &'ast AstAlloc, env: Option<&TypeEnv<'ast>>, - resolver: Option<&dyn ImportResolver>, + resolver: Option<&dyn AstImportResolver<'ast>>, ) -> ApparentType<'ast> { match self.defs.as_slice() { // If there is a resolved part, the apparent type is `Dyn`: a resolved part itself is a