change highlight format, add cinematic tech. category

sony · Aug 26, 2024 · c10759d · c10759d
1 parent f46ea60
commit c10759d
Show file tree

Hide file tree

Showing 2 changed files with 129 additions and 105 deletions.
diff --git a/README.md b/README.md
@@ -6,8 +6,9 @@
 <div class="bk_container">
 	<div class="bk_item"><a href="#sec_dgm">I. Deep Generative Modeling</a></div>
 	<div class="bk_item"><a href="#sec_nlp">II. Multimodal<br/>NLP</a></div>
-	<div class="bk_item"><a href="#sec_mct">III. Music & Cinematic Techs.</a></div>
-    <div class="bk_item"><a href="#sec_challenges">IV. Hosted Challenges</a></div>
+	<div class="bk_item"><a href="#sec_mt">III. Music<br/>Technology</a></div>
+    <div class="bk_item"><a href="#sec_ct">IV. Cinematic Technology</a></div>
+    <div class="bk_item"><a href="#sec_challenges">V. Hosted Challenges</a></div>
 </div>
 
 <a name="sec_dgm"></a>
@@ -57,7 +58,6 @@
 			<a href="https://arxiv.org/abs/2405.14822">[arXiv]</a>
 		</h5>
 		<p>A 64x64 pre-trained diffusion model is all you need for 1-step high-resolution SOTA generation</p>
-		<div class="tile_highlight">Preprint</div>
 	</div>		
 	<div class="tile">
 		<h3>HQ-VAE</h3>
@@ -87,7 +87,7 @@
 			<a href="https://github.com/sony/gibbsddrm">[code]</a>
 		</h5>
 		<p>Achieving blind inversion using DDPM</p>
-		<div class="tile_highlight">ICML23 Oral</div>
+		<div class="tile_highlight">ICML23</div>
 		<p>Applications:<br/>
 			<a href="https://arxiv.org/abs/2211.04124">[DeReverb]</a>
 			<a href="https://arxiv.org/abs/2210.17287">[SpeechEnhance]</a>
@@ -100,7 +100,7 @@
 			<a href="https://arxiv.org/abs/2306.00367">[arXiv]</a>
 		</h5>
 		<p>Theoretically unified framework for "consistency" on diffusion model</p>
-		<div class="tile_highlight">ICML23 SPIGM workshop</div>
+		<div class="tile_highlight">ICML23 SPIGM Workshop</div>
 	</div>
 	<div class="tile">
 		<h3>SQ-VAE</h3>
@@ -177,7 +177,8 @@
 			<a href="https://arxiv.org/abs/2305.02364">[arXiv]</a>
 			<a href="https://github.com/Silin159/PeaCoK">[code]</a>
 		</h5>
-		<p>PeaCoK: Persona Commonsense Knowledge for Consistent and Engaging Narratives<br>(ACL2023, Outstanding Paper Award)</p>
+		<p>PeaCoK: Persona Commonsense Knowledge for Consistent and Engaging Narratives<br>(Outstanding Paper Award)</p>
+        <div class="tile_highlight">ACL23</div>
 	</div>
 	<div class="tile">
 		<h3>ComFact</h3>
@@ -187,16 +188,16 @@
 			<a href="https://arxiv.org/abs/2210.12678">[arXiv]</a>
 			<a href="https://github.com/epfl-nlp/ComFact">[code]</a>
 		</h5>
-		<p>ComFact: A Benchmark for Linking Contextual Commonsense Knowledge<br>(EMNLP2022 Findings)</p>
+		<p>ComFact: A Benchmark for Linking Contextual Commonsense Knowledge</p>
+        <div class="tile_highlight">EMNLP22 Findings</div>
 	</div>
         <div class="tile" style="background-color: white;"></div>
         <div class="tile" style="background-color: white;"></div>
 
 </div>
 
-
-<a name="sec_mct"></a>
-# III. Music & Cinematic Technologies
+<a name="sec_mt"></a>
+# III. Music Technologies
 
 <br>
 
@@ -232,49 +233,6 @@
 		<p>MusicMagus: Zero-Shot Text-to-Music Editing via Diffusion Models</p>
 		<div class="tile_highlight">IJCAI24</div>
 	</div>
-	<div class="tile">
-		<h3>GenWarp</h3>
-		<img src="./assets/genwarp.png">
-		<h5>
-			<a href="https://arxiv.org/abs/2405.17251">[arXiv]</a>
-			<a href="https://genwarp-nvs.github.io/">[demo]</a>
-		</h5>
-		<p>GenWarp: Single Image to Novel Views with Semantic-Preserving Generative Warping</p>
-	</div>
-	<div class="tile">
-		<h3>Acoustic Inv. Rendering</h3>
-		<img src="./assets/hearing_anything_anywhere.png">
-		<h5>
-			<a href="https://openaccess.thecvf.com/content/CVPR2024/html/Wang_Hearing_Anything_Anywhere_CVPR_2024_paper.html">[CVF]</a>
-			<a href="https://arxiv.org/abs/2406.07532">[arXiv]</a>
-			<a href="https://zenodo.org/records/11195833">[dataset]</a>
-			<a href="https://github.com/maswang32/hearinganythinganywhere/">[code]</a>
-			<a href="https://masonlwang.com/hearinganythinganywhere/">[demo]</a>
-		</h5>
-		<p>Hearing Anything Anywhere</p>
-		<div class="tile_highlight">CVPR24</div>
-	</div>
-	<div class="tile">
-		<h3>STARSS23</h3>
-		<img src="./assets/STARSS23.png">
-		<h5>
-			<a href="https://arxiv.org/abs/2306.09126">[arXiv]</a>
-			<a href="https://zenodo.org/records/7880637">[dataset]</a>
-		</h5>
-		<p>STARSS23: An Audio-Visual Dataset of Spatial Recordings of Real Scenes with Spatiotemporal Annotations of Sound Events</p>
-		<div class="tile_highlight">NeurIPS23</div>
-	</div>
-	<div class="tile">
-		<h3>BigVSAN Vocoder</h3>
-		<img src="./assets/BigVSAN.png">
-		<h5>
-			<a href="https://arxiv.org/abs/2309.02836">[arXiv]</a>
-			<a href="https://github.com/sony/bigvsan">[code]</a>
-			<a href="https://takashishibuyasony.github.io/bigvsan/">[demo]</a>
-		</h5>
-		<p>BigVSAN: Enhancing GAN-based Neural Vocoders with Slicing Adversarial Network</p>
-		<div class="tile_highlight">ICASSP24</div>
-	</div>
 	<div class="tile">
 		<h3>Instr.-Agnostic Trans.</h3>
 		<img src="./assets/timbretrap.png">
@@ -296,16 +254,6 @@
 		<p>VRDMG: Vocal Restoration via Diffusion Posterior Sampling with Multiple Guidance</p>
 		<div class="tile_highlight">ICASSP24</div>
 	</div>	
-	<div class="tile">
-		<h3>Zero-/Few-shot SELD</h3>
-		<img src="./assets/ZeroFewSELD.png">
-		<h5>
-			<a href="https://ieeexplore.ieee.org/document/10448497">[IEEE]</a>
-			<a href="https://arxiv.org/abs/2309.09223">[arXiv]</a>
-		</h5>
-		<p>Zero- and Few-shot Sound Event Localization and Detection</p>
-		<div class="tile_highlight">ICASSP24</div>
-	</div>
 	<div class="tile">
 		<h3>CLIPSep</h3>
 		<img src="./assets/CLIPSep.png">
@@ -315,7 +263,8 @@
 			<a href="https://github.com/sony/CLIPSep">[code]</a>
 			<a href="https://sony.github.io/CLIPSep/">[demo]</a>
 		</h5>
-		<p>CLIPSep: Learning Text-queried Sound Separation with Noisy Unlabeled Videos<br>(ICLR2023)</p>
+		<p>CLIPSep: Learning Text-queried Sound Separation with Noisy Unlabeled Videos</p>
+        <div class="tile_highlight">ICLR23</div>
 	</div>	
 	<div class="tile">
 		<h3>hFT-Transformer</h3>
@@ -324,35 +273,17 @@
 			<a href="https://arxiv.org/abs/2307.04305">[arXiv]</a>
 			<a href="https://github.com/sony/hFT-Transformer">[code]</a>
 		</h5>
-		<p>Automatic Piano Transcription with Hierarchical Frequency-Time Transformer<br>(ISMIR2023)</p>
-	</div>
-	<div class="tile">
-		<h3>Audio Restoration: ViT-AE</h3>
-		<img src="./assets/vitae.png">
-		<h5>
-			<a href="https://ieeexplore.ieee.org/document/10248171">[IEEE]</a>
-			<a href="https://arxiv.org/abs/2305.06701">[arXiv]</a>
-			<a href="https://zzaudio.github.io/Demo_Extend_AudioMAE_toward_Restoration/demo_page.html">[demo]</a>
-		</h5>
-		<p>Extending Audio Masked Autoencoders Toward Audio Restoration<br>(WASPAA2023)</p>
-	</div>
-	<div class="tile">
-		<h3>Diffiner</h3>
-		<img src="./assets/Diffiner.png">
-		<h5>
-			<a href="https://www.isca-speech.org/archive/interspeech_2023/sawata23_interspeech.html">[ISCA]</a>
-			<a href="https://arxiv.org/abs/2210.17287">[arXiv]</a>
-			<a href="https://github.com/sony/diffiner">[code]</a>
-		</h5>
-		<p>Diffiner: A Versatile Diffusion-based Generative Refiner for Speech Enhancement<br>(INTERSPEECH2023)</p>
+		<p>Automatic Piano Transcription with Hierarchical Frequency-Time Transformer</p>
+        <div class="tile_highlight">ISMIR23</div>
 	</div>
 	<div class="tile">
 		<h3>Automatic Music Tagging</h3>
 		<img src="./assets/ResAtt.png">
 		<h5>
 			<a href="https://arxiv.org/abs/2302.08136">[arXiv]</a>
 		</h5>
-		<p>An Attention-based Approach To Hierarchical Multi-label Music Instrument Classification<br>(ICASSP2023)</p>
+		<p>An Attention-based Approach To Hierarchical Multi-label Music Instrument Classification</p>
+        <div class="tile_highlight">ICASSP23</div>
 	</div>
 	<div class="tile">
 		<h3>Vocal Dereverberation</h3>
@@ -361,7 +292,8 @@
 			<a href="https://arxiv.org/abs/2211.04124">[arXiv]</a>
 			<a href="https://koichi-saito-sony.github.io/unsupervised-vocal-dereverb/">[demo]</a>
 		</h5>
-		<p>Unsupervised Vocal Dereverberation with Diffusion-based Generative Models<br>(ICASSP2023)</p>
+		<p>Unsupervised Vocal Dereverberation with Diffusion-based Generative Models</p>
+        <div class="tile_highlight">ICASSP23</div>
 	</div>
 	<div class="tile">
 		<h3>Mixing Style Transfer</h3>
@@ -371,7 +303,8 @@
 			<a href="https://github.com/jhtonyKoo/music_mixing_style_transfer">[code]</a>
 			<a href="https://jhtonykoo.github.io/MixingStyleTransfer/">[demo]</a>
 		</h5>
-		<p>Music Mixing Style Transfer: A Contrastive Learning Approach to Disentangle Audio Effects<br>(ICASSP2023)</p>
+		<p>Music Mixing Style Transfer: A Contrastive Learning Approach to Disentangle Audio Effects</p>
+        <div class="tile_highlight">ICASSP23</div>
 	</div>
 	<div class="tile">
 		<h3>Music Transcription</h3>
@@ -381,7 +314,8 @@
 			<a href="https://github.com/sony/DiffRoll">[code]</a>
 			<a href="https://sony.github.io/DiffRoll/">[demo]</a>
 		</h5>
-		<p>DiffRoll: Diffusion-based Generative Music Transcription with Unsupervised Pretraining Capability<br>(ICASSP2023)</p>
+		<p>DiffRoll: Diffusion-based Generative Music Transcription with Unsupervised Pretraining Capability</p>
+        <div class="tile_highlight">ICASSP23</div>
 	</div>
 	<div class="tile">
 		<h3>Singing Voice Vocoder</h3>
@@ -390,7 +324,8 @@
 			<a href="https://arxiv.org/abs/2210.07508">[arXiv]</a>
 			<a href="https://t-naoya.github.io/hdm/">[demo]</a>
 		</h5>
-		<p>Hierarchical Diffusion Models for Singing Voice Neural Vocoder<br>(ICASSP2023)</p>
+		<p>Hierarchical Diffusion Models for Singing Voice Neural Vocoder</p>
+        <div class="tile_highlight">ICASSP23</div>
 	</div>
 	<div class="tile">
 		<h3>Distortion Effect Removal</h3>
@@ -400,7 +335,8 @@
 			<a href="https://arxiv.org/abs/2202.01664">[arXiv]</a>
 			<a href="https://joimort.github.io/distortionremoval/">[demo]</a>
 		</h5>
-		<p>Distortion Audio Effects: Learning How to Recover the Clean Signal<br>(ISMIR2022)</p>
+		<p>Distortion Audio Effects: Learning How to Recover the Clean Signal</p>
+        <div class="tile_highlight">ISMIR22</div>
 	</div>
 	<div class="tile">
 		<h3>Automatic Music Mixing</h3>
@@ -411,15 +347,17 @@
 			<a href="https://github.com/sony/fxnorm-automix">[code]</a>
 			<a href="https://marco-martinez-sony.github.io/FxNorm-automix/">[demo]</a>
 		</h5>
-		<p>Automatic Music Mixing with Deep Learning and Out-of-Domain Data<br>(ISMIR2022)</p>
+		<p>Automatic Music Mixing with Deep Learning and Out-of-Domain Data</p>
+        <div class="tile_highlight">ISMIR22</div>
 	</div>
 	<div class="tile">
 		<h3>Sound Separation</h3>
 		<a href="https://ieeexplore.ieee.org/document/9746317"><img src="./assets/srcsep.png"></a>
 		<h5>
 			<a href="https://ieeexplore.ieee.org/document/9746317">[IEEE]</a>
 		</h5>
-		<p>Music Source Separation with Deep Equilibrium Models<br>(ICASSP2022)</p>
+		<p>Music Source Separation with Deep Equilibrium Models</p>
+        <div class="tile_highlight">ICASSP22</div>
 	</div>
 	<div class="tile">
 		<h3>Automatic DJ Transition</h3>
@@ -429,16 +367,8 @@
 			<a href="https://github.com/ChenPaulYu/DJtransGAN">[code]</a>
 			<a href="https://paulyuchen.com/djtransgan-icassp2022/">[demo]</a>
 		</h5>
-		<p>Automatic DJ Transitions with Differentiable Audio Effects and Generative Adversarial Networks<br>(ICASSP2022)</p>
-	</div>
-	<div class="tile">
-		<h3>Sound Event Localization and Detection</h3>
-		<img src="./assets/ACCDOA.png">
-		<h5>
-			<a href="https://ieeexplore.ieee.org/document/9746384">[IEEE]</a>
-			<a href="https://arxiv.org/abs/2110.07124">[arXiv]</a>
-		</h5>
-		<p>Multi-ACCDOA: Localizing and Detecting Overlapping Sounds from the Same Class with Auxiliary Duplicating Permutation Invariant Training<br>(ICASSP2022)</p>
+		<p>Automatic DJ Transitions with Differentiable Audio Effects and Generative Adversarial Networks</p>
+        <div class="tile_highlight">ICASSP22</div>
 	</div>
 	<div class="tile">
 		<h3>Singing Voice Conversion</h3>
@@ -461,8 +391,102 @@
 <div class="tile" style="background-color: white;"></div>
 </div>
 
+<a name="sec_ct"></a>
+# IV. Cinematic Technologies
+
+<br/>
+
+<div class="trow">
+	<div class="tile">
+		<h3>GenWarp</h3>
+		<img src="./assets/genwarp.png">
+		<h5>
+			<a href="https://arxiv.org/abs/2405.17251">[arXiv]</a>
+			<a href="https://genwarp-nvs.github.io/">[demo]</a>
+		</h5>
+		<p>GenWarp: Single Image to Novel Views with Semantic-Preserving Generative Warping</p>
+	</div>
+	<div class="tile">
+		<h3>Acoustic Inv. Rendering</h3>
+		<img src="./assets/hearing_anything_anywhere.png">
+		<h5>
+			<a href="https://openaccess.thecvf.com/content/CVPR2024/html/Wang_Hearing_Anything_Anywhere_CVPR_2024_paper.html">[CVF]</a>
+			<a href="https://arxiv.org/abs/2406.07532">[arXiv]</a>
+			<a href="https://zenodo.org/records/11195833">[dataset]</a>
+			<a href="https://github.com/maswang32/hearinganythinganywhere/">[code]</a>
+			<a href="https://masonlwang.com/hearinganythinganywhere/">[demo]</a>
+		</h5>
+		<p>Hearing Anything Anywhere</p>
+		<div class="tile_highlight">CVPR24</div>
+	</div>
+	<div class="tile">
+		<h3>STARSS23</h3>
+		<img src="./assets/STARSS23.png">
+		<h5>
+			<a href="https://arxiv.org/abs/2306.09126">[arXiv]</a>
+			<a href="https://zenodo.org/records/7880637">[dataset]</a>
+		</h5>
+		<p>STARSS23: An Audio-Visual Dataset of Spatial Recordings of Real Scenes with Spatiotemporal Annotations of Sound Events</p>
+		<div class="tile_highlight">NeurIPS23</div>
+	</div>
+	<div class="tile">
+		<h3>BigVSAN Vocoder</h3>
+		<img src="./assets/BigVSAN.png">
+		<h5>
+			<a href="https://arxiv.org/abs/2309.02836">[arXiv]</a>
+			<a href="https://github.com/sony/bigvsan">[code]</a>
+			<a href="https://takashishibuyasony.github.io/bigvsan/">[demo]</a>
+		</h5>
+		<p>BigVSAN: Enhancing GAN-based Neural Vocoders with Slicing Adversarial Network</p>
+		<div class="tile_highlight">ICASSP24</div>
+	</div>
+	<div class="tile">
+		<h3>Zero-/Few-shot SELD</h3>
+		<img src="./assets/ZeroFewSELD.png">
+		<h5>
+			<a href="https://ieeexplore.ieee.org/document/10448497">[IEEE]</a>
+			<a href="https://arxiv.org/abs/2309.09223">[arXiv]</a>
+		</h5>
+		<p>Zero- and Few-shot Sound Event Localization and Detection</p>
+		<div class="tile_highlight">ICASSP24</div>
+	</div>
+	<div class="tile">
+		<h3>Audio Restoration: ViT-AE</h3>
+		<img src="./assets/vitae.png">
+		<h5>
+			<a href="https://ieeexplore.ieee.org/document/10248171">[IEEE]</a>
+			<a href="https://arxiv.org/abs/2305.06701">[arXiv]</a>
+			<a href="https://zzaudio.github.io/Demo_Extend_AudioMAE_toward_Restoration/demo_page.html">[demo]</a>
+		</h5>
+		<p>Extending Audio Masked Autoencoders Toward Audio Restoration</p>
+        <div class="tile_highlight">WASPAA23</div>
+	</div>
+	<div class="tile">
+		<h3>Diffiner</h3>
+		<img src="./assets/Diffiner.png">
+		<h5>
+			<a href="https://www.isca-speech.org/archive/interspeech_2023/sawata23_interspeech.html">[ISCA]</a>
+			<a href="https://arxiv.org/abs/2210.17287">[arXiv]</a>
+			<a href="https://github.com/sony/diffiner">[code]</a>
+		</h5>
+		<p>Diffiner: A Versatile Diffusion-based Generative Refiner for Speech Enhancement</p>
+        <div class="tile_highlight">INTERSPEECH2023</div>
+	</div>
+	<div class="tile">
+		<h3>Sound Event Localization and Detection</h3>
+		<img src="./assets/ACCDOA.png">
+		<h5>
+			<a href="https://ieeexplore.ieee.org/document/9746384">[IEEE]</a>
+			<a href="https://arxiv.org/abs/2110.07124">[arXiv]</a>
+		</h5>
+		<p>Multi-ACCDOA: Localizing and Detecting Overlapping Sounds from the Same Class with Auxiliary Duplicating Permutation Invariant Training</p>
+        <div class="tile_highlight">ICASSP22</div>
+	</div>
+    <div class="tile" style="background-color: white;"></div>
+</div>
+
 <a name="sec_challenges"></a>
-# IV. Hosted Challenges
+# V. Hosted Challenges
 
 <br/>
 

diff --git a/local.css b/local.css
@@ -17,7 +17,7 @@
   border-image: linear-gradient(to bottom, #5ecc95, #159957) 1 100%;
   border-width: 0 2px;
   background-color: none;
-  width: 23%;
+  width: 18%;
   height: 68px;
 
   box-sizing: border-box;