Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 26c705f7 authored by Sasha Smundak's avatar Sasha Smundak
Browse files

A tool to facilitate large ninja files comparison.

Bug: 201713929
Test: internal
Change-Id: Ifd976eed1e58b7409e3deacf99917206f0149ade
parent 9885f2f7
Loading
Loading
Loading
Loading
+151 −0
Original line number Diff line number Diff line
# Ninja File Canonicalizer

Suppose we have a tool that generates a Ninja file from some other description (think Kati and makefiles), and during
the testing we discovered a regression. Furthermore, suppose that the generated Ninja file is large (think millions of
lines). And, the new Ninja file has build statements and rules in a slightly different order. As the tool generates the
rule names, the real differences in the output of the `diff` command are drowned in noise. Enter Canoninja.

Canoninja renames each Ninja rule to the hash of its contents. After that, we can just sort the build statements, and a
simple `comm` command immediately reveal the essential difference between the files.

## Example

Consider the following makefile

```makefile
second :=
first: foo
foo:
	@echo foo
second: bar
bar:
	@echo bar
```

Depending on Kati version converting it to Ninja file will yield either:

```
$ cat /tmp/1.ninja
# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb

pool local_pool
 depth = 72

build _kati_always_build_: phony

build first: phony foo
rule rule0
 description = build $out
 command = /bin/sh -c "echo foo"
build foo: rule0
build second: phony bar
rule rule1
 description = build $out
 command = /bin/sh -c "echo bar"
build bar: rule1

default first
```

or

```
$ cat 2.ninja
# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310

pool local_pool
 depth = 72

build _kati_always_build_: phony

build second: phony bar
rule rule0
 description = build $out
 command = /bin/sh -c "echo bar"
build bar: rule0
build first: phony foo
rule rule1
 description = build $out
 command = /bin/sh -c "echo foo"
build foo: rule1

default first
```

This is a quirk in Kati, see https://github.com/google/kati/issues/238

Trying to find out the difference between the targets even after sorting them isn't too helpful:

```
diff <(grep '^build' /tmp/1.ninja|sort) <(grep '^build' /tmp/2.ninja | sort)
1c1
< build bar: rule1
---
> build bar: rule0
3c3
< build foo: rule0
---
> build foo: rule1
```

However, running these files through `canoninja` yields

```
$ canoninja /tmp/1.ninja
# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb

pool local_pool
 depth = 72

build _kati_always_build_: phony

build first: phony foo
rule R2f9981d3c152fc255370dc67028244f7bed72a03
 description = build $out
 command = /bin/sh -c "echo foo"
build foo: R2f9981d3c152fc255370dc67028244f7bed72a03
build second: phony bar
rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
 description = build $out
 command = /bin/sh -c "echo bar"
build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c

default first
```

and

```
~/go/bin/canoninja /tmp/2.ninja
# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310

pool local_pool
 depth = 72

build _kati_always_build_: phony

build second: phony bar
rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
 description = build $out
 command = /bin/sh -c "echo bar"
build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
build first: phony foo
rule R2f9981d3c152fc255370dc67028244f7bed72a03
 description = build $out
 command = /bin/sh -c "echo foo"
build foo: R2f9981d3c152fc255370dc67028244f7bed72a03

default first
```

and when we extract only build statements and sort them, we see that both Ninja files define the same graph:

```shell
$ diff <(~/go/bin/canoninja /tmp/1.ninja | grep '^build' | sort) \
       <(~/go/bin/canoninja /tmp/2.ninja | grep '^build' | sort)
```

# Todo

* Optionally output only the build statements, optionally sorted
* Handle continuation lines correctly
+130 −0
Original line number Diff line number Diff line
package canoninja

import (
	"bytes"
	"crypto/sha1"
	"encoding/hex"
	"fmt"
	"io"
)

var (
	rulePrefix  = []byte("rule ")
	buildPrefix = []byte("build ")
	phonyRule   = []byte("phony")
)

func Generate(path string, buffer []byte, sink io.Writer) error {
	// Break file into lines
	from := 0
	var lines [][]byte
	for from < len(buffer) {
		line := getLine(buffer[from:])
		lines = append(lines, line)
		from += len(line)
	}

	// FOr each rule, calculate and remember its digest
	ruleDigest := make(map[string]string)
	for i := 0; i < len(lines); {
		if bytes.HasPrefix(lines[i], rulePrefix) {
			// Find ruleName
			rn := ruleName(lines[i])
			if len(rn) == 0 {
				return fmt.Errorf("%s:%d: rule name is missing or on the next line", path, i+1)
			}
			sRuleName := string(rn)
			if _, ok := ruleDigest[sRuleName]; ok {
				return fmt.Errorf("%s:%d: the rule %s has been already defined", path, i+1, sRuleName)
			}
			// Calculate rule text digest as a digests of line digests.
			var digests []byte
			doDigest := func(b []byte) {
				h := sha1.New()
				h.Write(b)
				digests = h.Sum(digests)

			}
			// For the first line, digest everything after rule's name
			doDigest(lines[i][cap(lines[i])+len(rn)-cap(rn):])
			for i++; i < len(lines) && lines[i][0] == ' '; i++ {
				doDigest(lines[i])
			}
			h := sha1.New()
			h.Write(digests)
			ruleDigest[sRuleName] = "R" + hex.EncodeToString(h.Sum(nil))

		} else {
			i++
		}
	}

	// Rewrite rule names.
	for i, line := range lines {
		if bytes.HasPrefix(line, buildPrefix) {
			brn := getBuildRuleName(line)
			if bytes.Equal(brn, phonyRule) {
				sink.Write(line)
				continue
			}
			if len(brn) == 0 {
				return fmt.Errorf("%s:%d: build statement lacks rule name", path, i+1)
			}
			sink.Write(line[0 : cap(line)-cap(brn)])
			if digest, ok := ruleDigest[string(brn)]; ok {
				sink.Write([]byte(digest))
			} else {
				return fmt.Errorf("%s:%d: no rule for this build target", path, i+1)
			}
			sink.Write(line[cap(line)+len(brn)-cap(brn):])
		} else if bytes.HasPrefix(line, rulePrefix) {
			rn := ruleName(line)
			// Write everything before it
			sink.Write(line[0 : cap(line)-cap(rn)])
			sink.Write([]byte(ruleDigest[string(rn)]))
			sink.Write(line[cap(line)+len(rn)-cap(rn):])
		} else {
			//goland:noinspection GoUnhandledErrorResult
			sink.Write(line)
		}
	}
	return nil
}

func getLine(b []byte) []byte {
	if n := bytes.IndexByte(b, '\n'); n >= 0 {
		return b[:n+1]
	}
	return b
}

// Returns build statement's rule name
func getBuildRuleName(line []byte) []byte {
	n := bytes.IndexByte(line, ':')
	if n <= 0 {
		return nil
	}
	ruleName := line[n+1:]
	if ruleName[0] == ' ' {
		ruleName = bytes.TrimLeft(ruleName, " ")
	}
	if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 {
		ruleName = ruleName[0:n]
	}
	return ruleName
}

// Returns rule statement's rule name
func ruleName(lineAfterRule []byte) []byte {
	ruleName := lineAfterRule[len(rulePrefix):]
	if len(ruleName) == 0 {
		return ruleName
	}
	if ruleName[0] == ' ' {
		ruleName = bytes.TrimLeft(ruleName, " ")
	}
	if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 {
		ruleName = ruleName[0:n]
	}
	return ruleName
}
+47 −0
Original line number Diff line number Diff line
package canoninja

import (
	"bytes"
	"testing"
)

func TestGenerate(t *testing.T) {
	tests := []struct {
		name     string
		in       []byte
		wantSink string
		wantErr  bool
	}{
		{
			name: "1",
			in: []byte(`
rule rule1
  abcd
rule rule2
  abcd
build x: rule1
`),
			wantSink: `
rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b
  abcd
rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b
  abcd
build x: R9c97aba7f61994be6862f5ea9a62d26130c7f48b
`,
			wantErr: false,
		},
	}
	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			sink := &bytes.Buffer{}
			err := Generate("<file>", tt.in, sink)
			if (err != nil) != tt.wantErr {
				t.Errorf("Generate() error = %v, wantErr %v", err, tt.wantErr)
				return
			}
			if gotSink := sink.String(); gotSink != tt.wantSink {
				t.Errorf("Generate() gotSink = %v, want %v", gotSink, tt.wantSink)
			}
		})
	}
}
+36 −0
Original line number Diff line number Diff line
package main

/*
   Canoninja reads a Ninja file and changes the rule names to be the digest of the rule contents.
   Feed  it to a filter that extracts only build statements, sort them, and you will have a crude
   but effective tool to find small differences between two Ninja files.
*/

import (
	"canoninja"
	"flag"
	"fmt"
	"os"
)

func main() {
	flag.Parse()
	files := flag.Args()
	if len(files) == 0 {
		files = []string{"/dev/stdin"}
	}
	rc := 0
	for _, f := range files {
		if buffer, err := os.ReadFile(f); err == nil {
			err = canoninja.Generate(f, buffer, os.Stdout)
			if err != nil {
				fmt.Fprintln(os.Stderr, err)
				rc = 1
			}
		} else {
			fmt.Fprintf(os.Stderr, "%s: %s\n", f, err)
			rc = 1
		}
	}
	os.Exit(rc)
}

tools/canoninja/go.mod

0 → 100644
+1 −0
Original line number Diff line number Diff line
module canoninja