Basic work-in-progress, can pack basic files now

This commit is contained in:
Laurence Withers 2019-02-24 12:17:57 +00:00
parent 0fc26e5414
commit e864d0829e
7 changed files with 1611 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/cmd/htpacker/htpacker

306
cmd/htpacker/packer.go Normal file
View File

@ -0,0 +1,306 @@
package main
import (
"bufio"
"crypto/sha512"
"fmt"
"io/ioutil"
"net/http"
"os"
"os/exec"
"golang.org/x/sys/unix"
"github.com/foobaz/go-zopfli/zopfli"
"github.com/lwithers/htpack/internal/packed"
"github.com/lwithers/pkg/writefile"
)
// TODO: abandon packed version if no size saving
var BrotliPath string = "brotli"
type FilesToPack map[string]FileToPack
type FileToPack struct {
Filename string `yaml:"filename"`
ContentType string `yaml:"content_type"`
DisableCompression bool `yaml:"disable_compression"`
DisableGzip bool `yaml:"disable_gzip"`
DisableBrotli bool `yaml:"disable_brotli"`
uncompressed, gzip, brotli packInfo
}
type packInfo struct {
present bool
offset, len uint64
}
func Pack(filesToPack FilesToPack, outputFilename string) error {
finalFname, outputFile, err := writefile.New(outputFilename)
if err != nil {
return err
}
defer writefile.Abort(outputFile)
packer := &packWriter{f: outputFile}
// write initial header (will rewrite offset/length when known)
hdr := &packed.Header{
Magic: 123,
Version: 1,
DirectoryOffset: 1,
DirectoryLength: 1,
}
m, _ := hdr.Marshal()
packer.Write(m)
dir := packed.Directory{
Files: make(map[string]*packed.File),
}
for path, fileToPack := range filesToPack {
info, err := packOne(packer, fileToPack)
if err != nil {
return err
}
dir.Files[path] = &info
}
// write the directory
if m, err = dir.Marshal(); err != nil {
// TODO: decorate
return err
}
packer.Pad()
hdr.DirectoryOffset = packer.Pos()
hdr.DirectoryLength = uint64(len(m))
if _, err := packer.Write(m); err != nil {
// TODO: decorate
return err
}
// write header at start of file
m, _ = hdr.Marshal()
if _, err = outputFile.WriteAt(m, 0); err != nil {
return err
}
// all done!
return writefile.Commit(finalFname, outputFile)
}
func packOne(packer *packWriter, fileToPack FileToPack) (info packed.File, err error) {
// implementation detail: write files at a page boundary
if err = packer.Pad(); err != nil {
return
}
// open and mmap input file
f, err := os.Open(fileToPack.Filename)
if err != nil {
return
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
return
}
data, err := unix.Mmap(int(f.Fd()), 0, int(fi.Size()),
unix.PROT_READ, unix.MAP_SHARED)
if err != nil {
// TODO: decorate
return
}
defer unix.Munmap(data)
// TODO: content-type, etag
info.Etag = etag(data)
info.ContentType = fileToPack.ContentType
if info.ContentType == "" {
info.ContentType = http.DetectContentType(data)
}
// copy the uncompressed version
fileData := &packed.FileData{
Offset: packer.Pos(),
Length: uint64(len(data)),
}
if _, err = packer.CopyFrom(f); err != nil {
// TODO: decorate
return
}
info.Uncompressed = fileData
if fileToPack.DisableCompression {
return
}
// gzip compression
if !fileToPack.DisableGzip {
if err = packer.Pad(); err != nil {
return
}
fileData = &packed.FileData{
Offset: packer.Pos(),
}
fileData.Length, err = packOneGzip(packer, data)
if err != nil {
return
}
info.Gzip = fileData
}
// brotli compression
if BrotliPath != "" && !fileToPack.DisableBrotli {
if err = packer.Pad(); err != nil {
return
}
fileData = &packed.FileData{
Offset: packer.Pos(),
}
fileData.Length, err = packOneBrotli(packer, fileToPack.Filename)
if err != nil {
return
}
info.Brotli = fileData
}
return
}
func etag(in []byte) string {
h := sha512.New384()
h.Write(in)
return fmt.Sprintf(`"1--%x"`, h.Sum(nil))
}
func packOneGzip(packer *packWriter, data []byte) (uint64, error) {
// write via temporary file
tmpfile, err := ioutil.TempFile("", "")
if err != nil {
return 0, err
}
defer os.Remove(tmpfile.Name())
defer tmpfile.Close()
// compress
opts := zopfli.DefaultOptions()
if len(data) > (10 << 20) { // 10MiB
opts.NumIterations = 5
}
buf := bufio.NewWriter(tmpfile)
if err = zopfli.GzipCompress(&opts, data, buf); err != nil {
return 0, err
}
if err = buf.Flush(); err != nil {
return 0, err
}
// copy into packfile
return packer.CopyFrom(tmpfile)
}
func packOneBrotli(packer *packWriter, filename string) (uint64, error) {
// write via temporary file
tmpfile, err := ioutil.TempFile("", "")
if err != nil {
return 0, err
}
defer os.Remove(tmpfile.Name())
defer tmpfile.Close()
// compress via commandline
cmd := exec.Command(BrotliPath, "--input", filename,
"--output", tmpfile.Name())
out, err := cmd.CombinedOutput()
if err != nil {
// TODO: decorate
_ = out
return 0, err
}
// copy into packfile
return packer.CopyFrom(tmpfile)
}
type packWriter struct {
f *os.File
err error
}
func (pw *packWriter) Write(buf []byte) (int, error) {
if pw.err != nil {
return 0, pw.err
}
n, err := pw.f.Write(buf)
pw.err = err
return n, err
}
func (pw *packWriter) Pos() uint64 {
pos, err := pw.f.Seek(0, os.SEEK_CUR)
if err != nil {
pw.err = err
}
return uint64(pos)
}
func (pw *packWriter) Pad() error {
if pw.err != nil {
return pw.err
}
pos, err := pw.f.Seek(0, os.SEEK_CUR)
if err != nil {
pw.err = err
return pw.err
}
pos &= 0xFFF
if pos == 0 {
return pw.err
}
if _, err = pw.f.Seek(4096-pos, os.SEEK_CUR); err != nil {
pw.err = err
}
return pw.err
}
func (pw *packWriter) CopyFrom(in *os.File) (uint64, error) {
if pw.err != nil {
return 0, pw.err
}
fi, err := in.Stat()
if err != nil {
pw.err = err
return 0, pw.err
}
var off int64
remain := fi.Size()
for remain > 0 {
var amt int
if remain > (1 << 30) {
amt = (1 << 30)
} else {
amt = int(remain)
}
amt, err = unix.Sendfile(int(pw.f.Fd()), int(in.Fd()), &off, amt)
remain -= int64(amt)
off += int64(amt)
if err != nil {
pw.err = err
return uint64(off), pw.err
}
}
return uint64(off), nil
}

View File

@ -0,0 +1,30 @@
package main
import (
"fmt"
"io/ioutil"
"os"
yaml "gopkg.in/yaml.v2"
)
func main() {
if err := run(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func run() error {
raw, err := ioutil.ReadFile("in.yaml")
if err != nil {
return err
}
var ftp FilesToPack
if err := yaml.UnmarshalStrict(raw, &ftp); err != nil {
return err
}
return Pack(ftp, "out.htpack")
}

8
go.mod
View File

@ -1 +1,9 @@
module github.com/lwithers/htpack
require (
github.com/foobaz/go-zopfli v0.0.0-20140122214029-7432051485e2
github.com/gogo/protobuf v1.2.1
github.com/lwithers/pkg v1.2.1
golang.org/x/sys v0.0.0-20180924175946-90868a75fefd
gopkg.in/yaml.v2 v2.2.2
)

14
go.sum Normal file
View File

@ -0,0 +1,14 @@
github.com/foobaz/go-zopfli v0.0.0-20140122214029-7432051485e2 h1:VA6jElpcJ+wkwEBufbnVkSBCA2TEnxdRppjRT5Kvh0A=
github.com/foobaz/go-zopfli v0.0.0-20140122214029-7432051485e2/go.mod h1:Yi95+RbwKz7uGndSuUhoq7LJKh8qH8DT9fnL4ewU30k=
github.com/gogo/protobuf v1.2.1 h1:/s5zKNz0uPFCZ5hddgPdo2TK2TVrUNMn0OOX8/aZMTE=
github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/lwithers/pkg v1.2.1 h1:KNnZFGv0iyduc+uUF5UB8vDyr2ofRq930cVKqrpQulY=
github.com/lwithers/pkg v1.2.1/go.mod h1:0CRdDnVCqIa5uaIs1u8Gmwl3M7sm181QmSmVVaPTZUo=
golang.org/x/sys v0.0.0-20180924175946-90868a75fefd h1:ELJRxcWg6//yYBDjuf/SnMg1+X0jj5+BP5xXF31wl4w=
golang.org/x/sys v0.0.0-20180924175946-90868a75fefd/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

1193
internal/packed/packed.pb.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,59 @@
syntax = "proto3";
package packed;
// Header at start of file. This must be a fixed, known size. Fields cannot
// be zero.
message Header {
// Magic number, used to quickly detect misconfigured systems or
// corrupted files.
fixed64 magic = 1;
// Version of file.
fixed64 version = 2;
// DirectoryOffset is the byte offset from the start of the file at
// which the Directory object may be found.
fixed64 directory_offset = 3;
// DirectoryLength is the byte length of the serialised Directory
// object.
fixed64 directory_length = 4;
}
// Directory of available files.
message Directory {
// Files available within this pack. The key is the path of the URL to
// serve, and the value describes the file associated with that path.
map<string, File> files = 1;
}
// File that can be served.
message File {
// ContentType of the file, copied directly into the "Content-Type" header.
string content_type = 1;
// Etag of the file (includes double quotes). Remembered by the browser
// and used to preempt responses if it is unmodified between resource get
// requests.
string etag = 2;
// Uncompressed version of the file.
FileData uncompressed = 3;
// Gzip compressed version of the file.
FileData gzip = 4;
// Brotli compressed version of the file.
FileData brotli = 5;
}
// FileData records the position of the file data within the pack.
message FileData {
// Offset is the start of the file, in bytes relative to the start of
// the pack.
fixed64 offset = 1;
// Length is the
fixed64 length = 2;
}