Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3bd9c7d3 authored by Treehugger Robot's avatar Treehugger Robot Committed by Gerrit Code Review
Browse files

Merge "A tool to facilitate large ninja files comparison."

parents f478b848 26c705f7
Loading
Loading
Loading
Loading
+151 −0
Original line number Diff line number Diff line
# Ninja File Canonicalizer

Suppose we have a tool that generates a Ninja file from some other description (think Kati and makefiles), and during
the testing we discovered a regression. Furthermore, suppose that the generated Ninja file is large (think millions of
lines). And, the new Ninja file has build statements and rules in a slightly different order. As the tool generates the
rule names, the real differences in the output of the `diff` command are drowned in noise. Enter Canoninja.

Canoninja renames each Ninja rule to the hash of its contents. After that, we can just sort the build statements, and a
simple `comm` command immediately reveal the essential difference between the files.

## Example

Consider the following makefile

```makefile
second :=
first: foo
foo:
	@echo foo
second: bar
bar:
	@echo bar
```

Depending on Kati version converting it to Ninja file will yield either:

```
$ cat /tmp/1.ninja
# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb

pool local_pool
 depth = 72

build _kati_always_build_: phony

build first: phony foo
rule rule0
 description = build $out
 command = /bin/sh -c "echo foo"
build foo: rule0
build second: phony bar
rule rule1
 description = build $out
 command = /bin/sh -c "echo bar"
build bar: rule1

default first
```

or

```
$ cat 2.ninja
# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310

pool local_pool
 depth = 72

build _kati_always_build_: phony

build second: phony bar
rule rule0
 description = build $out
 command = /bin/sh -c "echo bar"
build bar: rule0
build first: phony foo
rule rule1
 description = build $out
 command = /bin/sh -c "echo foo"
build foo: rule1

default first
```

This is a quirk in Kati, see https://github.com/google/kati/issues/238

Trying to find out the difference between the targets even after sorting them isn't too helpful:

```
diff <(grep '^build' /tmp/1.ninja|sort) <(grep '^build' /tmp/2.ninja | sort)
1c1
< build bar: rule1
---
> build bar: rule0
3c3
< build foo: rule0
---
> build foo: rule1
```

However, running these files through `canoninja` yields

```
$ canoninja /tmp/1.ninja
# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb

pool local_pool
 depth = 72

build _kati_always_build_: phony

build first: phony foo
rule R2f9981d3c152fc255370dc67028244f7bed72a03
 description = build $out
 command = /bin/sh -c "echo foo"
build foo: R2f9981d3c152fc255370dc67028244f7bed72a03
build second: phony bar
rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
 description = build $out
 command = /bin/sh -c "echo bar"
build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c

default first
```

and

```
~/go/bin/canoninja /tmp/2.ninja
# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310

pool local_pool
 depth = 72

build _kati_always_build_: phony

build second: phony bar
rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
 description = build $out
 command = /bin/sh -c "echo bar"
build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
build first: phony foo
rule R2f9981d3c152fc255370dc67028244f7bed72a03
 description = build $out
 command = /bin/sh -c "echo foo"
build foo: R2f9981d3c152fc255370dc67028244f7bed72a03

default first
```

and when we extract only build statements and sort them, we see that both Ninja files define the same graph:

```shell
$ diff <(~/go/bin/canoninja /tmp/1.ninja | grep '^build' | sort) \
       <(~/go/bin/canoninja /tmp/2.ninja | grep '^build' | sort)
```

# Todo

* Optionally output only the build statements, optionally sorted
* Handle continuation lines correctly
+130 −0
Original line number Diff line number Diff line
package canoninja

import (
	"bytes"
	"crypto/sha1"
	"encoding/hex"
	"fmt"
	"io"
)

var (
	rulePrefix  = []byte("rule ")
	buildPrefix = []byte("build ")
	phonyRule   = []byte("phony")
)

func Generate(path string, buffer []byte, sink io.Writer) error {
	// Break file into lines
	from := 0
	var lines [][]byte
	for from < len(buffer) {
		line := getLine(buffer[from:])
		lines = append(lines, line)
		from += len(line)
	}

	// FOr each rule, calculate and remember its digest
	ruleDigest := make(map[string]string)
	for i := 0; i < len(lines); {
		if bytes.HasPrefix(lines[i], rulePrefix) {
			// Find ruleName
			rn := ruleName(lines[i])
			if len(rn) == 0 {
				return fmt.Errorf("%s:%d: rule name is missing or on the next line", path, i+1)
			}
			sRuleName := string(rn)
			if _, ok := ruleDigest[sRuleName]; ok {
				return fmt.Errorf("%s:%d: the rule %s has been already defined", path, i+1, sRuleName)
			}
			// Calculate rule text digest as a digests of line digests.
			var digests []byte
			doDigest := func(b []byte) {
				h := sha1.New()
				h.Write(b)
				digests = h.Sum(digests)

			}
			// For the first line, digest everything after rule's name
			doDigest(lines[i][cap(lines[i])+len(rn)-cap(rn):])
			for i++; i < len(lines) && lines[i][0] == ' '; i++ {
				doDigest(lines[i])
			}
			h := sha1.New()
			h.Write(digests)
			ruleDigest[sRuleName] = "R" + hex.EncodeToString(h.Sum(nil))

		} else {
			i++
		}
	}

	// Rewrite rule names.
	for i, line := range lines {
		if bytes.HasPrefix(line, buildPrefix) {
			brn := getBuildRuleName(line)
			if bytes.Equal(brn, phonyRule) {
				sink.Write(line)
				continue
			}
			if len(brn) == 0 {
				return fmt.Errorf("%s:%d: build statement lacks rule name", path, i+1)
			}
			sink.Write(line[0 : cap(line)-cap(brn)])
			if digest, ok := ruleDigest[string(brn)]; ok {
				sink.Write([]byte(digest))
			} else {
				return fmt.Errorf("%s:%d: no rule for this build target", path, i+1)
			}
			sink.Write(line[cap(line)+len(brn)-cap(brn):])
		} else if bytes.HasPrefix(line, rulePrefix) {
			rn := ruleName(line)
			// Write everything before it
			sink.Write(line[0 : cap(line)-cap(rn)])
			sink.Write([]byte(ruleDigest[string(rn)]))
			sink.Write(line[cap(line)+len(rn)-cap(rn):])
		} else {
			//goland:noinspection GoUnhandledErrorResult
			sink.Write(line)
		}
	}
	return nil
}

func getLine(b []byte) []byte {
	if n := bytes.IndexByte(b, '\n'); n >= 0 {
		return b[:n+1]
	}
	return b
}

// Returns build statement's rule name
func getBuildRuleName(line []byte) []byte {
	n := bytes.IndexByte(line, ':')
	if n <= 0 {
		return nil
	}
	ruleName := line[n+1:]
	if ruleName[0] == ' ' {
		ruleName = bytes.TrimLeft(ruleName, " ")
	}
	if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 {
		ruleName = ruleName[0:n]
	}
	return ruleName
}

// Returns rule statement's rule name
func ruleName(lineAfterRule []byte) []byte {
	ruleName := lineAfterRule[len(rulePrefix):]
	if len(ruleName) == 0 {
		return ruleName
	}
	if ruleName[0] == ' ' {
		ruleName = bytes.TrimLeft(ruleName, " ")
	}
	if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 {
		ruleName = ruleName[0:n]
	}
	return ruleName
}
+47 −0
Original line number Diff line number Diff line
package canoninja

import (
	"bytes"
	"testing"
)

func TestGenerate(t *testing.T) {
	tests := []struct {
		name     string
		in       []byte
		wantSink string
		wantErr  bool
	}{
		{
			name: "1",
			in: []byte(`
rule rule1
  abcd
rule rule2
  abcd
build x: rule1
`),
			wantSink: `
rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b
  abcd
rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b
  abcd
build x: R9c97aba7f61994be6862f5ea9a62d26130c7f48b
`,
			wantErr: false,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			sink := &bytes.Buffer{}
			err := Generate("<file>", tt.in, sink)
			if (err != nil) != tt.wantErr {
				t.Errorf("Generate() error = %v, wantErr %v", err, tt.wantErr)
				return
			}
			if gotSink := sink.String(); gotSink != tt.wantSink {
				t.Errorf("Generate() gotSink = %v, want %v", gotSink, tt.wantSink)
			}
		})
	}
}
+36 −0
Original line number Diff line number Diff line
package main

/*
   Canoninja reads a Ninja file and changes the rule names to be the digest of the rule contents.
   Feed  it to a filter that extracts only build statements, sort them, and you will have a crude
   but effective tool to find small differences between two Ninja files.
*/

import (
	"canoninja"
	"flag"
	"fmt"
	"os"
)

func main() {
	flag.Parse()
	files := flag.Args()
	if len(files) == 0 {
		files = []string{"/dev/stdin"}
	}
	rc := 0
	for _, f := range files {
		if buffer, err := os.ReadFile(f); err == nil {
			err = canoninja.Generate(f, buffer, os.Stdout)
			if err != nil {
				fmt.Fprintln(os.Stderr, err)
				rc = 1
			}
		} else {
			fmt.Fprintf(os.Stderr, "%s: %s\n", f, err)
			rc = 1
		}
	}
	os.Exit(rc)
}

tools/canoninja/go.mod

0 → 100644
+1 −0
Original line number Diff line number Diff line
module canoninja