diff --git a/Cargo.lock b/Cargo.lock index dd4ebe4ff..2a9770deb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -162,6 +162,7 @@ dependencies = [ "data-url", "deno_ast", "futures", + "monch", "once_cell", "parking_lot", "pretty_assertions", @@ -534,6 +535,12 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "monch" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f13de1c3edc9a5b9dc3a1029f56e9ab3eba34640010aff4fc01044c42ef67afa" + [[package]] name = "new_debug_unreachable" version = "1.0.4" diff --git a/Cargo.toml b/Cargo.toml index 87978621c..4bd26d68d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ anyhow = "1.0.43" data-url = "0.2.0" deno_ast = { version = "0.24.0", features = ["dep_graph", "module_specifier"] } futures = "0.3.17" +monch = "0.4.0" once_cell = "1.16.0" parking_lot = "0.12.0" regex = "1.5.4" diff --git a/src/analyzer.rs b/src/analyzer.rs index 12904f529..5ef66bae8 100644 --- a/src/analyzer.rs +++ b/src/analyzer.rs @@ -9,18 +9,11 @@ use deno_ast::ModuleSpecifier; use deno_ast::SourceRange; use deno_ast::SourceRangedForSpanned; use deno_ast::SourceTextInfo; -use once_cell::sync::Lazy; -use regex::Match; -use regex::Regex; use serde::Deserialize; use serde::Serialize; use crate::graph::Position; - -/// Matches the `@deno-types` pragma. -static DENO_TYPES_RE: Lazy = Lazy::new(|| { - Regex::new(r#"(?i)^\s*@deno-types\s*=\s*(?:["']([^"']+)["']|(\S+))"#).unwrap() -}); +use crate::pragma::parse_deno_types; /// A `@deno-types` pragma. pub struct DenoTypesPragma { @@ -32,47 +25,28 @@ pub struct DenoTypesPragma { pub fn analyze_deno_types( desc: &DependencyDescriptor, ) -> Option { - fn comment_position_to_position_range( - mut comment_start: Position, - m: &Match, - ) -> PositionRange { - // the comment text starts after the double slash or slash star, so add 2 - comment_start.character += 2; - PositionRange { - // This will always be on the same line. - // Does -1 and +1 to include the quotes + let comment = desc.leading_comments.last()?; + // @deno-types cannot be on a multi-line comment + if comment.range.start.line != comment.range.end.line { + return None; + } + + let deno_types = parse_deno_types(&comment.text).ok()?; + // the comment text starts after the double slash or slash star, so add 2 + let start_char = comment.range.start.character + 2; + Some(DenoTypesPragma { + specifier: deno_types.text.to_string(), + range: PositionRange { start: Position { - line: comment_start.line, - character: comment_start.character + m.start() - 1, + line: comment.range.start.line, + character: start_char + deno_types.quote_start, }, end: Position { - line: comment_start.line, - character: comment_start.character + m.end() + 1, + line: comment.range.start.line, + character: start_char + deno_types.quote_end, }, - } - } - - let comment = desc.leading_comments.last()?; - let captures = DENO_TYPES_RE.captures(&comment.text)?; - if let Some(m) = captures.get(1) { - Some(DenoTypesPragma { - specifier: m.as_str().to_string(), - range: comment_position_to_position_range( - comment.range.start.clone(), - &m, - ), - }) - } else if let Some(m) = captures.get(2) { - Some(DenoTypesPragma { - specifier: m.as_str().to_string(), - range: comment_position_to_position_range( - comment.range.start.clone(), - &m, - ), - }) - } else { - unreachable!("Unexpected captures from deno types regex") - } + }, + }) } #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] diff --git a/src/ast.rs b/src/ast.rs index 8b11e0cdc..93de16d11 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -9,6 +9,7 @@ use crate::analyzer::SpecifierWithRange; use crate::analyzer::TypeScriptReference; use crate::graph::Position; use crate::module_specifier::ModuleSpecifier; +use crate::pragma::parse_triple_slash_reference; use crate::DependencyKind; use crate::ImportAssertions; @@ -29,6 +30,8 @@ use std::cell::RefCell; use std::collections::HashMap; use std::sync::Arc; +// todo(dsherret): parse all these with monch in the future + /// Matches a JSDoc import type reference (`{import("./example.js")}` static JSDOC_IMPORT_RE: Lazy = Lazy::new(|| { Regex::new(r#"\{[^}]*import\(['"]([^'"]+)['"]\)[^}]*}"#).unwrap() @@ -36,17 +39,6 @@ static JSDOC_IMPORT_RE: Lazy = Lazy::new(|| { /// Matches the `@jsxImportSource` pragma. static JSX_IMPORT_SOURCE_RE: Lazy = Lazy::new(|| Regex::new(r#"(?i)^[\s*]*@jsxImportSource\s+(\S+)"#).unwrap()); -/// Matches a `/// ` comment reference. -static TRIPLE_SLASH_REFERENCE_RE: Lazy = - Lazy::new(|| Regex::new(r"(?i)^/\s*").unwrap()); -/// Matches a path reference, which adds a dependency to a module -static PATH_REFERENCE_RE: Lazy = - Lazy::new(|| Regex::new(r#"(?i)\spath\s*=\s*["']([^"']*)["']"#).unwrap()); -/// Matches a types reference, which for JavaScript files indicates the -/// location of types to use when type checking a program that includes it as -/// a dependency. -static TYPES_REFERENCE_RE: Lazy = - Lazy::new(|| Regex::new(r#"(?i)\stypes\s*=\s*["']([^"']*)["']"#).unwrap()); /// Parses modules to a ParsedSource. pub trait ModuleParser { @@ -332,30 +324,39 @@ fn analyze_ts_references( ) -> Vec { let mut references = Vec::new(); for comment in parsed_source.get_leading_comments().iter() { - if TRIPLE_SLASH_REFERENCE_RE.is_match(&comment.text) { - let comment_start = comment.start(); - if let Some(captures) = PATH_REFERENCE_RE.captures(&comment.text) { - let m = captures.get(1).unwrap(); + if comment.kind == CommentKind::Line { + if let Ok(path_ref) = parse_triple_slash_reference("path", &comment.text) + { + let comment_start = comment.range().start + 2; references.push(TypeScriptReference::Path(SpecifierWithRange { - text: m.as_str().to_string(), - range: comment_source_to_position_range( - comment_start, - &m, - parsed_source.text_info(), - false, - ), + text: path_ref.text.to_string(), + range: PositionRange { + start: Position::from_source_pos( + comment_start + path_ref.quote_start, + parsed_source.text_info(), + ), + end: Position::from_source_pos( + comment_start + path_ref.quote_end, + parsed_source.text_info(), + ), + }, })); - } else if let Some(captures) = TYPES_REFERENCE_RE.captures(&comment.text) + } else if let Ok(path_ref) = + parse_triple_slash_reference("types", &comment.text) { - let m = captures.get(1).unwrap(); + let comment_start = comment.range().start + 2; references.push(TypeScriptReference::Types(SpecifierWithRange { - text: m.as_str().to_string(), - range: comment_source_to_position_range( - comment_start, - &m, - parsed_source.text_info(), - false, - ), + text: path_ref.text.to_string(), + range: PositionRange { + start: Position::from_source_pos( + comment_start + path_ref.quote_start, + parsed_source.text_info(), + ), + end: Position::from_source_pos( + comment_start + path_ref.quote_end, + parsed_source.text_info(), + ), + }, })); } } diff --git a/src/graph.rs b/src/graph.rs index 513f73d1e..31d92f07b 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -606,26 +606,7 @@ pub struct Module { } impl Module { - fn new( - specifier: ModuleSpecifier, - kind: ModuleKind, - source: Arc, - ) -> Self { - Self { - dependencies: Default::default(), - kind, - maybe_cache_info: None, - maybe_source: Some(source), - maybe_types_dependency: None, - media_type: MediaType::Unknown, - specifier, - } - } - - pub fn new_without_source( - specifier: ModuleSpecifier, - kind: ModuleKind, - ) -> Self { + fn new_without_source(specifier: ModuleSpecifier, kind: ModuleKind) -> Self { Self { dependencies: Default::default(), kind, @@ -1414,8 +1395,15 @@ pub(crate) fn parse_module_from_module_info( maybe_resolver: Option<&dyn Resolver>, ) -> Module { // Init the module and determine its media type - let mut module = Module::new(specifier.clone(), kind, source); - module.media_type = media_type; + let mut module = Module { + dependencies: Default::default(), + kind, + maybe_cache_info: None, + maybe_source: Some(source), + maybe_types_dependency: None, + media_type, + specifier: specifier.clone(), + }; // Analyze the TypeScript triple-slash references for reference in module_info.ts_references { diff --git a/src/lib.rs b/src/lib.rs index 8c6696bd8..c4be59b8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ mod analyzer; mod ast; mod graph; mod module_specifier; +mod pragma; pub mod source; mod text_encoding; @@ -476,7 +477,7 @@ mod tests { Source::Module { specifier: "file:///a/test01.ts", maybe_headers: None, - content: r#"// @deno-types=./test02.d.ts + content: r#"// @deno-types="./test02.d.ts" import * as a from "./test02.js"; console.log(a); diff --git a/src/pragma.rs b/src/pragma.rs new file mode 100644 index 000000000..f9566de66 --- /dev/null +++ b/src/pragma.rs @@ -0,0 +1,62 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use monch::*; + +pub struct ParsedDenoTypes<'a> { + pub text: &'a str, + pub quote_start: usize, + pub quote_end: usize, +} + +pub fn parse_deno_types(input: &str) -> Result { + let original_input = input; + let (input, _) = skip_whitespace(input)?; + let (input, _) = tag("@deno-types")(input)?; + let (input, _) = ch('=')(input)?; + let quote_start_input = input; + let (input, quote_char) = or(ch('"'), ch('\"'))(input)?; + let (input, text) = take_while(|c| c != quote_char)(input)?; + let (input, _) = ch(quote_char)(input)?; + Ok(ParsedDenoTypes { + text, + quote_start: original_input.len() - quote_start_input.len(), + quote_end: original_input.len() - input.len(), + }) +} + +pub struct ParsedTripleSlashReference<'a> { + pub text: &'a str, + pub quote_start: usize, + pub quote_end: usize, +} + +/// Matches a `/// ` comment reference based on the kind (ex. path or types). +pub fn parse_triple_slash_reference<'a>( + kind: &str, + input: &'a str, +) -> Result, ParseError<'a>> { + // regex in TS codebase: /^(\/\/\/\s*/ + + let original_input = input; + let (input, _) = ch('/')(input)?; // only one, because we're starting from within a comment line + let (input, _) = skip_whitespace(input)?; + let (input, _) = tag("") { + return Err(monch::ParseError::Backtrace); + } + Ok(ParsedTripleSlashReference { + text, + quote_start: original_input.len() - quote_start_input.len(), + quote_end: original_input.len() - quote_end_input.len(), + }) +}