feat(oxc_codegen): support generate range leading comments (#4898)

1. Support print range leading comments for specific expr or stmt
oxc-project · Aug 15, 2024 · d49fb16 · d49fb16
1 parent 9c700ed
commit d49fb16
Show file tree

Hide file tree

Showing 7 changed files with 78 additions and 109 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/oxc_codegen/Cargo.toml b/crates/oxc_codegen/Cargo.toml
@@ -32,7 +32,6 @@ bitflags   = { workspace = true }
 nonmax     = { workspace = true }
 once_cell  = { workspace = true }
 daachorse  = { workspace = true }
-rustc-hash = { workspace = true }
 
 [dev-dependencies]
 oxc_parser = { workspace = true }

diff --git a/crates/oxc_codegen/src/annotation_comment.rs b/crates/oxc_codegen/src/annotation_comment.rs
@@ -46,37 +46,29 @@ impl From<(Comment, AnnotationKind)> for AnnotationComment {
 }
 
 impl<'a, const MINIFY: bool> Codegen<'a, MINIFY> {
-    pub(crate) fn get_leading_annotate_comment(
+    pub(crate) fn get_leading_annotate_comments(
         &mut self,
         node_start: u32,
-    ) -> Option<AnnotationComment> {
-        let maybe_leading_comment = self.try_get_leading_comment(node_start);
-        let comment = maybe_leading_comment?;
-        if self.latest_consumed_comment_end >= comment.span.end {
-            return None;
-        }
-        let real_end = match comment.kind {
-            CommentKind::SingleLine => comment.span.end,
-            CommentKind::MultiLine => comment.span.end + 2,
-        };
-        let source_code = self.source_text;
-        let content_between = &source_code[real_end as usize..node_start as usize];
-        // Used for VariableDeclaration (Rollup only respects "const" and only for the first one)
-        if content_between.chars().all(|ch| ch.is_ascii_whitespace()) {
-            let comment_content =
-                &source_code[comment.span.start as usize..comment.span.end as usize];
-            if let Some(m) = MATCHER.find_iter(&comment_content).next() {
-                let annotation_kind = match m.value() {
-                    0 | 1 => AnnotationKind::NO_SIDE_EFFECTS,
-                    2 | 3 => AnnotationKind::PURE,
-                    _ => unreachable!(),
-                };
-                return Some((*comment, annotation_kind).into());
-            }
-            None
-        } else {
-            None
+    ) -> Vec<AnnotationComment> {
+        if !self.comment_options.preserve_annotate_comments {
+            return vec![];
         }
+        self.get_leading_comments(self.latest_consumed_comment_end, node_start)
+            .filter_map(|comment| {
+                let source_code = self.source_text;
+                let comment_content =
+                    &source_code[comment.span.start as usize..comment.span.end as usize];
+                if let Some(m) = MATCHER.find_iter(&comment_content).next() {
+                    let annotation_kind = match m.value() {
+                        0 | 1 => AnnotationKind::NO_SIDE_EFFECTS,
+                        2 | 3 => AnnotationKind::PURE,
+                        _ => unreachable!(),
+                    };
+                    return Some((*comment, annotation_kind).into());
+                }
+                None
+            })
+            .collect::<Vec<_>>()
     }
 
     pub(crate) fn print_comment(&mut self, comment: AnnotationComment) {
@@ -93,18 +85,19 @@ impl<'a, const MINIFY: bool> Codegen<'a, MINIFY> {
         // in this example, `Object.getOwnPropertyNames(Symbol)` and `Object.getOwnPropertyNames(Symbol).filter()`, `Object.getOwnPropertyNames(Symbol).filter().map()`
         // share the same leading comment. since they both are call expr and has same span start, we need to avoid print the same comment multiple times.
         let comment_span = comment.span();
+
         if self.latest_consumed_comment_end >= comment_span.end {
             return;
         }
-        self.latest_consumed_comment_end = comment_span.end;
-        match comment.kind() {
+        let real_comment_end = match comment.kind() {
             CommentKind::SingleLine => {
                 self.print_str("//");
                 self.print_range_of_source_code(
                     comment_span.start as usize..comment_span.end as usize,
                 );
                 self.print_soft_newline();
                 self.print_indent();
+                comment_span.end
             }
             CommentKind::MultiLine => {
                 self.print_str("/*");
@@ -113,8 +106,10 @@ impl<'a, const MINIFY: bool> Codegen<'a, MINIFY> {
                 );
                 self.print_str("*/");
                 self.print_soft_space();
+                comment_span.end + 2
             }
-        }
+        };
+        self.update_last_consumed_comment_end(real_comment_end);
         // FIXME: esbuild function `restoreExprStartFlags`
         self.start_of_default_export = self.code_len();
     }
@@ -123,21 +118,30 @@ impl<'a, const MINIFY: bool> Codegen<'a, MINIFY> {
         if !self.comment_options.preserve_annotate_comments {
             return;
         }
-        let mut annotation_kind_set = AnnotationKind::empty();
-        if let Some(comment) = self.try_take_moved_comment(node_start) {
-            let kind = comment.annotation_kind();
-            if !annotation_kind_set.intersects(kind) {
-                annotation_kind_set.insert(kind);
-                self.print_comment(comment);
-            }
-        }
-        let maybe_leading_annotate_comment = self.get_leading_annotate_comment(node_start);
-        if let Some(comment) = maybe_leading_annotate_comment {
+        let annotation_kind_set = AnnotationKind::empty();
+
+        let leading_annotate_comments = self.get_leading_annotate_comments(node_start);
+        self.print_comments(&leading_annotate_comments, annotation_kind_set);
+    }
+
+    #[inline]
+    pub(crate) fn print_comments(
+        &mut self,
+        leading_annotate_comment: &Vec<AnnotationComment>,
+        mut annotation_kind_set: AnnotationKind,
+    ) {
+        for &comment in leading_annotate_comment {
             let kind = comment.annotation_kind();
             if !annotation_kind_set.intersects(kind) {
                 annotation_kind_set.insert(kind);
                 self.print_comment(comment);
             }
+            self.update_last_consumed_comment_end(comment.span().end);
         }
     }
+
+    #[inline]
+    pub fn update_last_consumed_comment_end(&mut self, end: u32) {
+        self.latest_consumed_comment_end = self.latest_consumed_comment_end.max(end);
+    }
 }
diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs
@@ -12,6 +12,7 @@ use oxc_syntax::{
 };
 
 use crate::{
+    annotation_comment::AnnotationKind,
     binary_expr_visitor::{BinaryExpressionVisitor, Binaryish, BinaryishOperator},
     Codegen, Context, Operator,
 };
@@ -154,6 +155,10 @@ impl<'a, const MINIFY: bool> Gen<MINIFY> for Statement<'a> {
                 p.print_semicolon_after_statement();
             }
         }
+
+        if p.comment_options.preserve_annotate_comments {
+            p.update_last_consumed_comment_end(self.span().end);
+        }
     }
 }
 
@@ -591,17 +596,9 @@ impl<'a, const MINIFY: bool> Gen<MINIFY> for VariableDeclaration<'a> {
         }
 
         if p.comment_options.preserve_annotate_comments
-            && matches!(self.kind, VariableDeclarationKind::Const)
+            && !matches!(self.kind, VariableDeclarationKind::Const)
         {
-            if let Some(declarator) = self.declarations.first() {
-                if let Some(ref init) = declarator.init {
-                    if let Some(leading_annotate_comment) =
-                        p.get_leading_annotate_comment(self.span.start)
-                    {
-                        p.move_comment(init.span().start, leading_annotate_comment);
-                    }
-                }
-            }
+            p.update_last_consumed_comment_end(self.span.start);
         }
         p.print_str(match self.kind {
             VariableDeclarationKind::Const => "const",
@@ -864,23 +861,16 @@ impl<'a, const MINIFY: bool> Gen<MINIFY> for ExportNamedDeclaration<'a> {
     fn gen(&self, p: &mut Codegen<{ MINIFY }>, ctx: Context) {
         p.add_source_mapping(self.span.start);
         p.print_indent();
+
         if p.comment_options.preserve_annotate_comments {
             match &self.declaration {
                 Some(Declaration::FunctionDeclaration(_)) => {
                     p.gen_comments(self.span.start);
                 }
                 Some(Declaration::VariableDeclaration(var_decl))
-                    if matches!(var_decl.kind, VariableDeclarationKind::Const) =>
+                    if !matches!(var_decl.kind, VariableDeclarationKind::Const) =>
                 {
-                    if let Some(declarator) = var_decl.declarations.first() {
-                        if let Some(ref init) = declarator.init {
-                            if let Some(leading_annotate_comment) =
-                                p.get_leading_annotate_comment(self.span.start)
-                            {
-                                p.move_comment(init.span().start, leading_annotate_comment);
-                            }
-                        }
-                    }
+                    p.update_last_consumed_comment_end(self.span.start);
                 }
                 _ => {}
             };
@@ -1076,6 +1066,9 @@ impl<'a, const MINIFY: bool> GenExpr<MINIFY> for Expression<'a> {
             Self::TSNonNullExpression(e) => e.gen_expr(p, precedence, ctx),
             Self::TSInstantiationExpression(e) => e.gen_expr(p, precedence, ctx),
         }
+        if p.comment_options.preserve_annotate_comments {
+            p.update_last_consumed_comment_end(self.span().end);
+        }
     }
 }
 
@@ -1415,14 +1408,12 @@ impl<'a, const MINIFY: bool> GenExpr<MINIFY> for PrivateFieldExpression<'a> {
 impl<'a, const MINIFY: bool> GenExpr<MINIFY> for CallExpression<'a> {
     fn gen_expr(&self, p: &mut Codegen<{ MINIFY }>, precedence: Precedence, ctx: Context) {
         let mut wrap = precedence >= Precedence::New || ctx.intersects(Context::FORBID_CALL);
-        let annotate_comment = p.get_leading_annotate_comment(self.span.start);
-        if annotate_comment.is_some() && precedence >= Precedence::Postfix {
+        let annotate_comments = p.get_leading_annotate_comments(self.span.start);
+        if !annotate_comments.is_empty() && precedence >= Precedence::Postfix {
             wrap = true;
         }
         p.wrap(wrap, |p| {
-            if let Some(comment) = annotate_comment {
-                p.print_comment(comment);
-            }
+            p.print_comments(&annotate_comments, AnnotationKind::empty());
             p.add_source_mapping(self.span.start);
             self.callee.gen_expr(p, Precedence::Postfix, Context::empty());
             if self.optional {
@@ -2086,14 +2077,12 @@ impl<'a, const MINIFY: bool> GenExpr<MINIFY> for ChainExpression<'a> {
 impl<'a, const MINIFY: bool> GenExpr<MINIFY> for NewExpression<'a> {
     fn gen_expr(&self, p: &mut Codegen<{ MINIFY }>, precedence: Precedence, ctx: Context) {
         let mut wrap = precedence >= self.precedence();
-        let annotate_comment = p.get_leading_annotate_comment(self.span.start);
-        if annotate_comment.is_some() && precedence >= Precedence::Postfix {
+        let annotate_comment = p.get_leading_annotate_comments(self.span.start);
+        if !annotate_comment.is_empty() && precedence >= Precedence::Postfix {
             wrap = true;
         }
         p.wrap(wrap, |p| {
-            if let Some(comment) = annotate_comment {
-                p.print_comment(comment);
-            }
+            p.print_comments(&annotate_comment, AnnotationKind::empty());
             p.print_space_before_identifier();
             p.add_source_mapping(self.span.start);
             p.print_str("new ");

diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs
@@ -12,8 +12,6 @@ mod sourcemap_builder;
 
 use std::{borrow::Cow, ops::Range};
 
-use rustc_hash::FxHashMap;
-
 use oxc_ast::{
     ast::{BindingIdentifier, BlockStatement, Expression, IdentifierReference, Program, Statement},
     Comment, Trivias,
@@ -35,8 +33,6 @@ pub use crate::{
     gen::{Gen, GenExpr},
 };
 
-use self::annotation_comment::AnnotationComment;
-
 /// Code generator without whitespace removal.
 pub type CodeGenerator<'a> = Codegen<'a, false>;
 
@@ -98,10 +94,6 @@ pub struct Codegen<'a, const MINIFY: bool> {
     // Builders
     sourcemap_builder: Option<SourcemapBuilder>,
 
-    /// The key of map is the node start position,
-    /// the first element of value is the start of the comment
-    /// the second element of value includes the end of the comment and comment kind.
-    move_comment_map: MoveCommentMap,
     latest_consumed_comment_end: u32,
 }
 
@@ -147,7 +139,6 @@ impl<'a, const MINIFY: bool> Codegen<'a, MINIFY> {
             indent: 0,
             quote: b'"',
             sourcemap_builder: None,
-            move_comment_map: MoveCommentMap::default(),
             latest_consumed_comment_end: 0,
         }
     }
@@ -521,8 +512,6 @@ impl<'a, const MINIFY: bool> Codegen<'a, MINIFY> {
     }
 }
 
-pub(crate) type MoveCommentMap = FxHashMap<u32, AnnotationComment>;
-
 // Comment related
 impl<'a, const MINIFY: bool> Codegen<'a, MINIFY> {
     /// Avoid issue related to rustc borrow checker .
@@ -532,28 +521,7 @@ impl<'a, const MINIFY: bool> Codegen<'a, MINIFY> {
         self.code.extend_from_slice(self.source_text[range].as_bytes());
     }
 
-    /// In some scenario, we want to move the comment that should be codegened to another position.
-    /// ```js
-    ///  /* @__NO_SIDE_EFFECTS__ */ export const a = function() {
-    ///
-    ///  }, b = 10000;
-    ///
-    /// ```
-    /// should generate such output:
-    /// ```js
-    ///   export const /* @__NO_SIDE_EFFECTS__ */ a = function() {
-    ///
-    ///  }, b = 10000;
-    /// ```
-    fn move_comment(&mut self, position: u32, full_comment_info: AnnotationComment) {
-        self.move_comment_map.insert(position, full_comment_info);
-    }
-
-    fn try_get_leading_comment(&self, start: u32) -> Option<&Comment> {
-        self.trivias.comments_range(0..start).next_back()
-    }
-
-    fn try_take_moved_comment(&mut self, node_start: u32) -> Option<AnnotationComment> {
-        self.move_comment_map.remove(&node_start)
+    fn get_leading_comments(&self, start: u32, end: u32) -> impl Iterator<Item = &'_ Comment> + '_ {
+        self.trivias.comments_range(start..end)
     }
 }
diff --git a/crates/oxc_codegen/tests/integration/pure_comments.rs b/crates/oxc_codegen/tests/integration/pure_comments.rs
@@ -198,4 +198,14 @@ const defineSSRCustomElement = /* @__NO_SIDE_EFFECTS__ */ (
 ",
         "const defineSSRCustomElement = /* #__NO_SIDE_EFFECTS__ */ (options, extraOptions) => {\n\treturn /* @__PURE__ */ defineCustomElement(options, extraOptions, hydrate);\n};\n",
     );
+
+    // Range leading comments
+    test(
+        r"
+const defineSSRCustomElement = () => {
+	return /* @__PURE__ */ /* @__NO_SIDE_EFFECTS__ */ /* #__NO_SIDE_EFFECTS__ */ defineCustomElement(options, extraOptions, hydrate);
+};
+",
+        "const defineSSRCustomElement = () => {\n\treturn /* @__PURE__ */ /* @__NO_SIDE_EFFECTS__ */ defineCustomElement(options, extraOptions, hydrate);\n};\n",
+    );
 }
diff --git a/crates/oxc_transformer/README.md b/crates/oxc_transformer/README.md
@@ -85,7 +85,7 @@ variable names) which causes Babel's tests to fail when run on Oxc's output.
 In future we may find a way to work around this problem.
 
 So where we feel Babel's implementation is inefficient, but we have to follow it at present to pass their
-tests, make a `// TODO(improve-on-babel): Babel's impl is inefficiant because X, we could do better by Y`
+tests, make a `// TODO(improve-on-babel): Babel's impl is inefficient because X, we could do better by Y`
 comment, so we can return to it later.
 
 ### Clear "entry points" 
@@ -139,7 +139,7 @@ Only exception is that parent can check if child transform is enabled or not.
 
 #### Bad! Don't do this.
 
-Here some of logic from child tranform is "leaked" into the parent:
+Here some of logic from child transform is "leaked" into the parent:
 
 ```rs
 // src/do_stuff/mod.rs