index.html

<!DOCTYPE html>

<HTML>
<HEAD>
  <META content="IE=5.0000" http-equiv="X-UA-Compatible">
  <META name="description" content="Zonghao Guo's home page"> 
  <META http-equiv="Content-Type" content="text/html; charset=gb2312">
  <LINK href="files/doc.css" 
    rel="stylesheet" type="text/css"> 
  <TITLE>Zonghao Guo</TITLE> 
  <META name="GENERATOR" content="MSHTML 11.00.10570.1001">
</HEAD>


<BODY> 
  <DIV id="layout-content" style="margin-top: 25px;">
  <TABLE>
    <TBODY>
    <TR>
      <TD width="670">
        <DIV id="toptitle">
        <H1>Zonghao Guo &nbsp;</H1></DIV>
        <H3>Ph.D. candidate</H3>
        <P>Room 330, Academy 2 Building 
        <BR>School of Electronic, Electrical and Communication Engineering
        <BR>University of Chinese Academy of Sciences
        <BR>Beijing, China, 101408.
        <BR>
        <BR> Email:  
        <A href="mailto:guozonghao19@mails.ucas.ac.cn"> guozonghao19@mails.ucas.ac.cn</A>; 
        <BR> Github: 
        <A href="https://github.com/guozonghao96">https://github.com/guozonghao96</A>;
        <BR> Google scholar:
        <A href="https://scholar.google.com.hk/citations?hl=&user=h1I6LJcAAAAJ&hl=en">https://scholar.google.com</A>
        <BR><BR></P>
      </TD>
      <TD>
        <IMG width="150" src="files/person_photo.jpg" border="0">
      </TD>
    </TR>
    <TR></TR></TBODY>
  </TABLE>
  <DIV id="layout-content" style="margin-top: 25px;">


  <H2>Biography</H2>
  <P> I am a Ph.D. candidate of <A href="http://lamp.ucas.ac.cn/">PRISDL</A> in the <A href="http://eece.ucas.ac.cn/index.php/en/">School of Electronic, Electrical and Communication Engineering</A>, 
    <A href="http://english.ucas.ac.cn/">University of Chinese Academy of Sciences </A>, 
    advised by <A href="http://people.ucas.ac.cn/~0007279?language=en">Prof. Qixiang Ye</A>. 
    I got a B.E. degree in Wuhan University, Wuhan in June 2019.      
  </P>

  <P>My research interests include computer vision and deep learning, specifically for object detection and instance segmentation.</P>

  <H2>Publications</H2>
    <table class="pub_table">
    <!-- <tbody> -->   
      <tr>
        <td class="pub_td1"><img src="files/PaperFig/LLaVA-UHD.jpg" class="papericon"></td>
        <td 
          class="pub_td2"><u>Zonghao Guo</u>, Ruyi Xu, Yuan Yao, Junbo Cui, Zanlin Ni, Chunjiang Ge, Tat-Seng Chua, Zhiyuan Liu, Maosong Sun, Gao Huang
          <br><b>LLaVA-UHD: an LMMPerceiving Any Aspect Ratio and High-Resolution Images</b>
	  <br>European Conference on Computer Vision, 2024
          <br>
          [<a href="https://arxiv.org/pdf/2403.11703">Paper</a>]
          [<a href="https://github.com/thunlp/LLaVA-UHD">Code</a>]
          <br>
        </td>
      </tr>
	    
      <tr>
        <td class="pub_td1"><img src="files/PaperFig/control_cap.png" class="papericon"></td>
        <td 
          class="pub_td2">Yuzhong Zhao, Yue Liu, <u>Zonghao Guo</u>, Weijia Wu, Chen Gong, Fang Wan, Qixiang Ye
          <br><b>ControlCap: Controllable Region-level Captioning</b>
	  <br>European Conference on Computer Vision, 2024
          <br>
          [<a href="https://arxiv.org/pdf/2401.17910">Paper</a>]
          [<a href="https://github.com/callsys/ControlCap">Code</a>]
          <br>
        </td>
      </tr>
	    
      <tr>
        <td class="pub_td1"><img src="files/PaperFig/cfa.jpg" class="papericon"></td>
        <td 
          class="pub_td2"><u>Zonghao Guo</u>, Chang Liu, Xiaosong Zhang, Jianbin Jiao, Xiangyang Ji, Qixiang Ye
          <br><b>Beyond Bounding-Box: Convex-hull Feature Adaptation for Oriented and Densely Packed Object Detection</b>
	  <br>IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2021
          <br>
          [<a href="https://openaccess.thecvf.com/content/CVPR2021/papers/Guo_Beyond_Bounding-Box_Convex-Hull_Feature_Adaptation_for_Oriented_and_Densely_Packed_CVPR_2021_paper.pdf">Paper</a>]
          [<a href="https://github.com/guozonghao96/BeyondBoundingBox">Code</a>]
          <br>
        </td>
      </tr>

      <tr>
        <td class="pub_td1"><img src="files/PaperFig/Conformer_tpami.png" class="papericon"></td>
        <td 
          class="pub_td2">Zhiliang Peng, <u>Zonghao Guo</u>, Wei Huang, Yaowei Wang, Lingxi Xie, Jianbin Jiao, Qixiang Ye
          <br><b>Conformer: Local features coupling global representations for visual recognition and detection</b>
          <br>IEEE Transactions on Pattern Analysis and Machine Intelligence, 2023
          <br>
          [<a href="https://doi.org/10.1109/TPAMI.2023.3243048">Paper</a>]
          [<a href="https://github.com/pengzhiliang/Conformer">Code</a>]
          <br>
        </td>
      </tr>
      
      <tr>
        <td class="pub_td1"><img src="files/PaperFig/attn_shift.jpg" class="papericon"></td>
        <td 
          class="pub_td2">Mingxiang Liao*, <u>Zonghao Guo*</u>, Yuze Wang, Peng Yuan, Bailan Feng, Fang Wan
          <br><b>AttentionShift: Iteratively Estimated Part-based Attention Map for Pointly Supervised Instance Segmentation</b>
          <br>IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2023
        </td>
      </tr>
	  
      <tr>
        <td class="pub_td1"><img src="files/PaperFig/imTED.png" class="papericon"></td>
        <td 
          class="pub_td2">Feng Liu, Xiaosong Zhang, Zhiliang Peng, <u>Zonghao Guo</u>, Fang Wan, Xiangyang Ji, Qixiang Ye
          <br><b>Integral Migrating Pre-trained Transformer Encoder-decoders for Visual Object Detection</b>
	  <br>IEEE International Conference on Computer Vision, 2023
          <br>
          [<a href="https://arxiv.org/abs/2205.09613">arXiv preprint</a>]
          [<a href="https://github.com/LiewFeng/imTED">Code</a>]
        </td>
      </tr>
	  
      <tr>
        <td class="pub_td1"><img src="files/PaperFig/cfa_tcsvt.jpg" class="papericon"></td>
        <td 
          class="pub_td2"><u>Zonghao Guo</u>, Xiaosong Zhang, Chang Liu, Xiangyang Ji, Jianbin Jiao, Qixiang Ye
          <br><b>Convex-hull Feature Adaptation for Oriented and Densely Packed Object Detection</b>
	  <br>IEEE Transactions on Circuits and Systems for Video Technology, 2022
          <br>
          [<a href="https://ieeexplore.ieee.org/document/9668956">Paper</a>]
	  [<a href="https://github.com/guozonghao96/BeyondBoundingBox">Code</a>]
          <br>
        </td>	
      </tr>
	  
      <tr>
        <td class="pub_td1"><img src="files/PaperFig/bfg.jpg" class="papericon"></td>
        <td 
          class="pub_td2">Yongqiang Mao*, <u>Zonghao Guo*</u>, Xiaonan Lu, Zhiqiang Yuan, Haowen Guo
          <br><b>Bidirectional Feature Globalization for Few-shot Semantic Segmentation of 3D Point Cloud Scenes</b>
	  <br>International Conference on 3D Vision (Oral), 2022
          <br>
          [<a href="https://arxiv.org/abs/2208.06671">Paper</a>]
          <br>
        </td>
      </tr>
	  
      <tr>
        <td class="pub_td1"><img src="files/PaperFig/LDA.png" class="papericon"></td>
        <td 
          class="pub_td2">Zhiliang Peng, Wei Huang, <u>Zonghao Guo</u>, Xiaosong Zhang, Jianbin Jiao, Qixiang Ye
          <br><b>Long-tailed Distribution Adaptation</b>
          <br>29th ACM International Conference on Multimedia, 2021
          <br>
          [<a href="https://arxiv.org/abs/2110.02686">Paper</a>]
          [<a href="https://github.com/pengzhiliang/LDA">Code</a>]
          <br>
        </td>
      </tr>
	  
      <tr>
        <td class="pub_td1"><img src="files/PaperFig/dgfa.jpg" class="papericon"></td>
        <td 
          class="pub_td2">Yongqiang Mao, Xian Sun, Kaiqiang Chen, Wenhui Diao, <u>Zonghao Guo</u>, Xiaonan Lu, Kun Fu
          <br><b>Semantic Segmentation for Point Cloud Scenes via Dilated Graph Feature Aggregation and Pyramid Decoders</b>
	  <br>AAAI Conference on Artificial Intelligence Workshop, 2021
          <br>
          [<a href="https://arxiv.org/abs/2204.04944">Paper</a>]
          <br>
        </td>
      </tr>
	  
    <!-- </tbody> -->
    </table>

    <!-- <br>
    <H2>Awards</H2>
        <LI>	Excellent Student Scholarship, Chinese Academy of Sciences, 2020.  </LI> -->
  
  <br> <br> 
  <H2>Statistics</H2>
  <script type="text/javascript" src="//rf.revolvermaps.com/0/0/8.js?i=5063gq35g0n&amp;m=0&amp;c=ff0000&amp;cr1=ffffff&amp;f=arial&amp;l=33" async="async"></script>

</BODY>
</HTML>