Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MAX_ROWS option for CSV rendering #30268

Merged
merged 12 commits into from
Jun 6, 2024
3 changes: 3 additions & 0 deletions custom/conf/app.example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1334,6 +1334,9 @@ LEVEL = Info
;;
;; Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit).
;MAX_FILE_SIZE = 524288
;;
;; Maximum allowed rows to render CSV files. (Set to 0 for no limit)
;MAX_ROWS = 2500

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down
94 changes: 35 additions & 59 deletions modules/markup/csv/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ package markup

import (
"bufio"
"bytes"
"fmt"
"html"
"io"
"regexp"
Expand All @@ -15,6 +13,8 @@ import (
"code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/util"
)

func init() {
Expand Down Expand Up @@ -81,86 +81,38 @@ func writeField(w io.Writer, element, class, field string) error {
func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
tmpBlock := bufio.NewWriter(output)
maxSize := setting.UI.CSV.MaxFileSize
maxRows := setting.UI.CSV.MaxRows

if maxSize == 0 {
return r.tableRender(ctx, input, tmpBlock)
if maxSize != 0 {
input = io.LimitReader(input, maxSize+1)
}

rawBytes, err := io.ReadAll(io.LimitReader(input, maxSize+1))
if err != nil {
return err
}

if int64(len(rawBytes)) <= maxSize {
return r.tableRender(ctx, bytes.NewReader(rawBytes), tmpBlock)
}
return r.fallbackRender(io.MultiReader(bytes.NewReader(rawBytes), input), tmpBlock)
}

func (Renderer) fallbackRender(input io.Reader, tmpBlock *bufio.Writer) error {
_, err := tmpBlock.WriteString("<pre>")
if err != nil {
return err
}

scan := bufio.NewScanner(input)
scan.Split(bufio.ScanRunes)
for scan.Scan() {
switch scan.Text() {
case `&`:
_, err = tmpBlock.WriteString("&amp;")
case `'`:
_, err = tmpBlock.WriteString("&#39;") // "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
case `<`:
_, err = tmpBlock.WriteString("&lt;")
case `>`:
_, err = tmpBlock.WriteString("&gt;")
case `"`:
_, err = tmpBlock.WriteString("&#34;") // "&#34;" is shorter than "&quot;".
default:
_, err = tmpBlock.Write(scan.Bytes())
}
if err != nil {
return err
}
}
if err = scan.Err(); err != nil {
return fmt.Errorf("fallbackRender scan: %w", err)
}

_, err = tmpBlock.WriteString("</pre>")
if err != nil {
return err
}
return tmpBlock.Flush()
}

func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock *bufio.Writer) error {
rd, err := csv.CreateReaderAndDetermineDelimiter(ctx, input)
if err != nil {
return err
}

if _, err := tmpBlock.WriteString(`<table class="data-table">`); err != nil {
return err
}
row := 1

row := 0
for {
fields, err := rd.Read()
if err == io.EOF {
if err == io.EOF || (row >= maxRows && maxRows != 0) {
break
}
if err != nil {
continue
}

if _, err := tmpBlock.WriteString("<tr>"); err != nil {
return err
}
element := "td"
if row == 1 {
if row == 0 {
element = "th"
}
if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row)); err != nil {
if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row+1)); err != nil {
return err
}
for _, field := range fields {
Expand All @@ -174,8 +126,32 @@ func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock

row++
}

if _, err = tmpBlock.WriteString("</table>"); err != nil {
return err
}

// Check if maxRows or maxSize is reached, and if true, warn.
if (row >= maxRows && maxRows != 0) || (rd.InputOffset() >= maxSize && maxSize != 0) {
warn := `<table class="data-table"><tr><td>`
rawLink := ` <a href="` + ctx.Links.RawLink() + `/` + util.PathEscapeSegments(ctx.RelativePath) + `">`

// Try to get the user translation
if locale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale); ok {
warn += locale.TrString("repo.file_too_large")
rawLink += locale.TrString("repo.file_view_raw")
} else {
warn += "The file is too large to be shown."
rawLink += "View Raw"
}

warn += rawLink + `</a></td></tr></table>`

// Write the HTML string to the output
if _, err := tmpBlock.WriteString(warn); err != nil {
return err
}
}

return tmpBlock.Flush()
}
10 changes: 0 additions & 10 deletions modules/markup/csv/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
package markup

import (
"bufio"
"bytes"
"strings"
"testing"

Expand All @@ -31,12 +29,4 @@ func TestRenderCSV(t *testing.T) {
assert.NoError(t, err)
assert.EqualValues(t, v, buf.String())
}

t.Run("fallbackRender", func(t *testing.T) {
var buf bytes.Buffer
err := render.fallbackRender(strings.NewReader("1,<a>\n2,<b>"), bufio.NewWriter(&buf))
assert.NoError(t, err)
want := "<pre>1,&lt;a&gt;\n2,&lt;b&gt;</pre>"
assert.Equal(t, want, buf.String())
})
}
3 changes: 3 additions & 0 deletions modules/setting/ui.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ var UI = struct {

CSV struct {
MaxFileSize int64
MaxRows int
} `ini:"ui.csv"`

Admin struct {
Expand Down Expand Up @@ -107,8 +108,10 @@ var UI = struct {
},
CSV: struct {
MaxFileSize int64
MaxRows int
}{
MaxFileSize: 524288,
MaxRows: 2500,
},
Admin: struct {
UserPagingNum int
Expand Down