gitness/gitrpc/diff/diff.go

507 lines
11 KiB
Go

// Copyright 2023 Harness, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package diff
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"strconv"
"strings"
"github.com/harness/gitness/gitrpc/enum"
)
// Predefine []byte variables to avoid runtime allocations.
var (
escapedSlash = []byte(`\\`)
regularSlash = []byte(`\`)
escapedTab = []byte(`\t`)
regularTab = []byte("\t")
)
// LineType is the line type in diff.
type LineType uint8
// A list of different line types.
const (
DiffLinePlain LineType = iota + 1
DiffLineAdd
DiffLineDelete
DiffLineSection
)
// FileType is the file status in diff.
type FileType uint8
// A list of different file statuses.
const (
FileAdd FileType = iota
FileChange
FileDelete
FileRename
)
// Line represents a line in diff.
type Line struct {
Type LineType // The type of the line
Content string // The content of the line
LeftLine int // The left line number
RightLine int // The right line number
}
// Section represents a section in diff.
type Section struct {
Lines []*Line // lines in the section
numAdditions int
numDeletions int
}
// NumLines returns the number of lines in the section.
func (s *Section) NumLines() int {
return len(s.Lines)
}
// Line returns a specific line by given type and line number in a section.
func (s *Section) Line(lineType LineType, line int) *Line {
var (
difference = 0
addCount = 0
delCount = 0
matchedDiffLine *Line
)
loop:
for _, diffLine := range s.Lines {
switch diffLine.Type {
case DiffLineAdd:
addCount++
case DiffLineDelete:
delCount++
default:
if matchedDiffLine != nil {
break loop
}
difference = diffLine.RightLine - diffLine.LeftLine
addCount = 0
delCount = 0
}
switch lineType {
case DiffLineDelete:
if diffLine.RightLine == 0 && diffLine.LeftLine == line-difference {
matchedDiffLine = diffLine
}
case DiffLineAdd:
if diffLine.LeftLine == 0 && diffLine.RightLine == line+difference {
matchedDiffLine = diffLine
}
}
}
if addCount == delCount {
return matchedDiffLine
}
return nil
}
// File represents a file in diff.
type File struct {
// The name and path of the file.
Path string
// The old name and path of the file.
OldPath string
// The type of the file.
Type FileType
// The index (SHA1 hash) of the file. For a changed/new file, it is the new SHA,
// and for a deleted file it becomes "000000".
SHA string
// OldSHA is the old index (SHA1 hash) of the file.
OldSHA string
// The sections in the file.
Sections []*Section
numAdditions int
numDeletions int
mode enum.EntryMode
oldMode enum.EntryMode
IsBinary bool
IsSubmodule bool
}
func (f *File) Status() string {
switch {
case f.Type == FileAdd:
return "added"
case f.Type == FileDelete:
return "deleted"
case f.Type == FileRename:
return "renamed"
case f.Type == FileChange:
return "changed"
default:
return "unchanged"
}
}
// NumSections returns the number of sections in the file.
func (f *File) NumSections() int {
return len(f.Sections)
}
// NumAdditions returns the number of additions in the file.
func (f *File) NumAdditions() int {
return f.numAdditions
}
// NumChanges returns the number of additions and deletions in the file.
func (f *File) NumChanges() int {
return f.numAdditions + f.numDeletions
}
// NumDeletions returns the number of deletions in the file.
func (f *File) NumDeletions() int {
return f.numDeletions
}
// Mode returns the mode of the file.
func (f *File) Mode() enum.EntryMode {
return f.mode
}
// OldMode returns the old mode of the file if it's changed.
func (f *File) OldMode() enum.EntryMode {
return f.oldMode
}
func (f *File) IsEmpty() bool {
return f.Path == "" && f.OldPath == ""
}
type Parser struct {
*bufio.Reader
// The next line that hasn't been processed. It is used to determine what kind
// of process should go in.
buffer []byte
isEOF bool
}
func (p *Parser) readLine() error {
if p.buffer != nil {
return nil
}
var err error
p.buffer, err = p.ReadBytes('\n')
if err != nil {
if err != io.EOF {
return fmt.Errorf("read string: %v", err)
}
p.isEOF = true
}
// Remove line break
if len(p.buffer) > 0 && p.buffer[len(p.buffer)-1] == '\n' {
p.buffer = p.buffer[:len(p.buffer)-1]
}
return nil
}
var diffHead = []byte("diff --git ")
func (p *Parser) parseFileHeader() (*File, error) {
submoduleMode := " 160000"
line := string(p.buffer)
p.buffer = nil
// NOTE: In case file name is surrounded by double quotes (it happens only in
// git-shell). e.g. diff --git "a/xxx" "b/xxx"
hasQuote := line[len(diffHead)] == '"'
middle := strings.Index(line, ` b/`)
if hasQuote {
middle = strings.Index(line, ` "b/`)
}
beg := len(diffHead)
a := line[beg+2 : middle]
b := line[middle+3:]
if hasQuote {
a = string(UnescapeChars([]byte(a[1 : len(a)-1])))
b = string(UnescapeChars([]byte(b[1 : len(b)-1])))
}
file := &File{
Path: a,
OldPath: b,
Type: FileChange,
}
// Check file diff type and submodule
var err error
checkType:
for !p.isEOF {
if err = p.readLine(); err != nil {
return nil, err
}
line := string(p.buffer)
p.buffer = nil
if len(line) == 0 {
continue
}
switch {
case strings.HasPrefix(line, enum.DiffExtHeaderNewFileMode):
file.Type = FileAdd
file.IsSubmodule = strings.HasSuffix(line, submoduleMode)
fields := strings.Fields(line)
if len(fields) > 0 {
mode, _ := strconv.ParseUint(fields[len(fields)-1], 8, 64)
file.mode = enum.EntryMode(mode)
if file.oldMode == 0 {
file.oldMode = file.mode
}
}
case strings.HasPrefix(line, enum.DiffExtHeaderDeletedFileMode):
file.Type = FileDelete
file.IsSubmodule = strings.HasSuffix(line, submoduleMode)
fields := strings.Fields(line)
if len(fields) > 0 {
mode, _ := strconv.ParseUint(fields[len(fields)-1], 8, 64)
file.mode = enum.EntryMode(mode)
if file.oldMode == 0 {
file.oldMode = file.mode
}
}
case strings.HasPrefix(line, enum.DiffExtHeaderIndex): // e.g. index ee791be..9997571 100644
fields := strings.Fields(line[6:])
shas := strings.Split(fields[0], "..")
if len(shas) != 2 {
return nil, errors.New("malformed index: expect two SHAs in the form of <old>..<new>")
}
file.OldSHA = shas[0]
file.SHA = shas[1]
if len(fields) > 1 {
mode, _ := strconv.ParseUint(fields[1], 8, 64)
file.mode = enum.EntryMode(mode)
file.oldMode = enum.EntryMode(mode)
}
break checkType
case strings.HasPrefix(line, enum.DiffExtHeaderSimilarity):
file.Type = FileRename
file.OldPath = a
file.Path = b
// No need to look for index if it's a pure rename
if strings.HasSuffix(line, "100%") {
break checkType
}
case strings.HasPrefix(line, enum.DiffExtHeaderNewMode):
fields := strings.Fields(line)
if len(fields) > 0 {
mode, _ := strconv.ParseUint(fields[len(fields)-1], 8, 64)
file.mode = enum.EntryMode(mode)
}
case strings.HasPrefix(line, enum.DiffExtHeaderOldMode):
fields := strings.Fields(line)
if len(fields) > 0 {
mode, _ := strconv.ParseUint(fields[len(fields)-1], 8, 64)
file.oldMode = enum.EntryMode(mode)
}
}
}
return file, nil
}
func (p *Parser) parseSection() (*Section, error) {
line := string(p.buffer)
p.buffer = nil
section := &Section{
Lines: []*Line{
{
Type: DiffLineSection,
Content: line,
},
},
}
// Parse line number, e.g. @@ -0,0 +1,3 @@
var leftLine, rightLine int
ss := strings.Split(line, "@@")
ranges := strings.Split(ss[1][1:], " ")
leftLine, _ = strconv.Atoi(strings.Split(ranges[0], ",")[0][1:])
if len(ranges) > 1 {
rightLine, _ = strconv.Atoi(strings.Split(ranges[1], ",")[0])
} else {
rightLine = leftLine
}
var err error
for !p.isEOF {
if err = p.readLine(); err != nil {
return nil, err
}
if len(p.buffer) == 0 {
p.buffer = nil
continue
}
// Make sure we're still in the section. If not, we're done with this section.
if p.buffer[0] != ' ' &&
p.buffer[0] != '+' &&
p.buffer[0] != '-' {
// No new line indicator
if p.buffer[0] == '\\' &&
bytes.HasPrefix(p.buffer, []byte(`\ No newline at end of file`)) {
p.buffer = nil
continue
}
return section, nil
}
line := string(p.buffer)
p.buffer = nil
switch line[0] {
case ' ':
section.Lines = append(section.Lines, &Line{
Type: DiffLinePlain,
Content: line,
LeftLine: leftLine,
RightLine: rightLine,
})
leftLine++
rightLine++
case '+':
section.Lines = append(section.Lines, &Line{
Type: DiffLineAdd,
Content: line,
RightLine: rightLine,
})
section.numAdditions++
rightLine++
case '-':
section.Lines = append(section.Lines, &Line{
Type: DiffLineDelete,
Content: line,
LeftLine: leftLine,
})
section.numDeletions++
if leftLine > 0 {
leftLine++
}
}
}
return section, nil
}
func (p *Parser) Parse(f func(f *File)) error {
file := new(File)
currentFileLines := 0
additions := 0
deletions := 0
var (
err error
)
for !p.isEOF {
if err = p.readLine(); err != nil {
return err
}
if len(p.buffer) == 0 ||
bytes.HasPrefix(p.buffer, []byte("+++ ")) ||
bytes.HasPrefix(p.buffer, []byte("--- ")) {
p.buffer = nil
continue
}
// Found new file
if bytes.HasPrefix(p.buffer, diffHead) {
// stream previous file
if !file.IsEmpty() && f != nil {
f(file)
}
file, err = p.parseFileHeader()
if err != nil {
return err
}
currentFileLines = 0
continue
}
if file == nil {
p.buffer = nil
continue
}
if bytes.HasPrefix(p.buffer, []byte("Binary")) {
p.buffer = nil
file.IsBinary = true
continue
}
// Loop until we found section header
if p.buffer[0] != '@' {
p.buffer = nil
continue
}
section, err := p.parseSection()
if err != nil {
return err
}
file.Sections = append(file.Sections, section)
file.numAdditions += section.numAdditions
file.numDeletions += section.numDeletions
additions += section.numAdditions
deletions += section.numDeletions
currentFileLines += section.NumLines()
}
// stream last file
if !file.IsEmpty() && f != nil {
f(file)
}
return nil
}
// UnescapeChars reverses escaped characters.
func UnescapeChars(in []byte) []byte {
if bytes.ContainsAny(in, "\\\t") {
return in
}
out := bytes.Replace(in, escapedSlash, regularSlash, -1)
out = bytes.Replace(out, escapedTab, regularTab, -1)
return out
}