2019-03-13 21:55:31 +00:00
|
|
|
package packer
|
2019-02-24 12:17:57 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"crypto/sha512"
|
|
|
|
"fmt"
|
|
|
|
"io/ioutil"
|
|
|
|
"net/http"
|
|
|
|
"os"
|
|
|
|
"os/exec"
|
|
|
|
|
|
|
|
"golang.org/x/sys/unix"
|
|
|
|
|
|
|
|
"github.com/foobaz/go-zopfli/zopfli"
|
|
|
|
"github.com/lwithers/htpack/internal/packed"
|
|
|
|
"github.com/lwithers/pkg/writefile"
|
|
|
|
)
|
|
|
|
|
|
|
|
var BrotliPath string = "brotli"
|
|
|
|
|
|
|
|
type FilesToPack map[string]FileToPack
|
|
|
|
|
|
|
|
type FileToPack struct {
|
|
|
|
Filename string `yaml:"filename"`
|
|
|
|
ContentType string `yaml:"content_type"`
|
|
|
|
DisableCompression bool `yaml:"disable_compression"`
|
|
|
|
DisableGzip bool `yaml:"disable_gzip"`
|
|
|
|
DisableBrotli bool `yaml:"disable_brotli"`
|
|
|
|
|
|
|
|
uncompressed, gzip, brotli packInfo
|
|
|
|
}
|
|
|
|
|
|
|
|
type packInfo struct {
|
|
|
|
present bool
|
|
|
|
offset, len uint64
|
|
|
|
}
|
|
|
|
|
2019-03-13 21:55:31 +00:00
|
|
|
const (
|
|
|
|
// minCompressionSaving means we'll only use the compressed version of
|
|
|
|
// the file if it's at least this many bytes smaller than the original.
|
|
|
|
// Chosen somewhat arbitrarily; we have to add an HTTP header, and the
|
|
|
|
// decompression overhead is not zero.
|
|
|
|
minCompressionSaving = 128
|
|
|
|
|
|
|
|
// minCompressionFraction means we'll only use the compressed version of
|
|
|
|
// the file if it's at least (origSize>>minCompressionFraction) bytes
|
|
|
|
// smaller than the original. This is a guess at when the decompression
|
|
|
|
// overhead outweighs the time saved in transmission.
|
|
|
|
minCompressionFraction = 7 // i.e. files must be at least 1/128 smaller
|
|
|
|
)
|
|
|
|
|
2019-03-05 18:41:02 +00:00
|
|
|
// Pack a file.
|
2019-02-24 12:17:57 +00:00
|
|
|
func Pack(filesToPack FilesToPack, outputFilename string) error {
|
|
|
|
finalFname, outputFile, err := writefile.New(outputFilename)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer writefile.Abort(outputFile)
|
|
|
|
packer := &packWriter{f: outputFile}
|
|
|
|
|
|
|
|
// write initial header (will rewrite offset/length when known)
|
|
|
|
hdr := &packed.Header{
|
2019-03-05 18:41:02 +00:00
|
|
|
Magic: packed.Magic,
|
|
|
|
Version: packed.VersionInitial,
|
2019-02-24 12:17:57 +00:00
|
|
|
DirectoryOffset: 1,
|
|
|
|
DirectoryLength: 1,
|
|
|
|
}
|
|
|
|
m, _ := hdr.Marshal()
|
|
|
|
packer.Write(m)
|
|
|
|
|
|
|
|
dir := packed.Directory{
|
|
|
|
Files: make(map[string]*packed.File),
|
|
|
|
}
|
|
|
|
|
|
|
|
for path, fileToPack := range filesToPack {
|
|
|
|
info, err := packOne(packer, fileToPack)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
dir.Files[path] = &info
|
|
|
|
}
|
|
|
|
|
|
|
|
// write the directory
|
|
|
|
if m, err = dir.Marshal(); err != nil {
|
2019-03-13 21:55:31 +00:00
|
|
|
err = fmt.Errorf("marshaling directory object: %v", err)
|
2019-02-24 12:17:57 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
packer.Pad()
|
|
|
|
hdr.DirectoryOffset = packer.Pos()
|
|
|
|
hdr.DirectoryLength = uint64(len(m))
|
|
|
|
if _, err := packer.Write(m); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// write header at start of file
|
|
|
|
m, _ = hdr.Marshal()
|
|
|
|
if _, err = outputFile.WriteAt(m, 0); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// all done!
|
|
|
|
return writefile.Commit(finalFname, outputFile)
|
|
|
|
}
|
|
|
|
|
|
|
|
func packOne(packer *packWriter, fileToPack FileToPack) (info packed.File, err error) {
|
|
|
|
// implementation detail: write files at a page boundary
|
|
|
|
if err = packer.Pad(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// open and mmap input file
|
|
|
|
f, err := os.Open(fileToPack.Filename)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
fi, err := f.Stat()
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
data, err := unix.Mmap(int(f.Fd()), 0, int(fi.Size()),
|
|
|
|
unix.PROT_READ, unix.MAP_SHARED)
|
|
|
|
if err != nil {
|
2019-03-13 21:55:31 +00:00
|
|
|
err = fmt.Errorf("mmap %s: %v", fileToPack.Filename, err)
|
2019-02-24 12:17:57 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
defer unix.Munmap(data)
|
|
|
|
|
|
|
|
info.Etag = etag(data)
|
|
|
|
info.ContentType = fileToPack.ContentType
|
|
|
|
if info.ContentType == "" {
|
|
|
|
info.ContentType = http.DetectContentType(data)
|
|
|
|
}
|
|
|
|
|
|
|
|
// copy the uncompressed version
|
|
|
|
fileData := &packed.FileData{
|
|
|
|
Offset: packer.Pos(),
|
|
|
|
Length: uint64(len(data)),
|
|
|
|
}
|
2019-03-13 21:55:31 +00:00
|
|
|
if _, err = packer.CopyFrom(f, fi); err != nil {
|
2019-02-24 12:17:57 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
info.Uncompressed = fileData
|
|
|
|
|
|
|
|
if fileToPack.DisableCompression {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// gzip compression
|
|
|
|
if !fileToPack.DisableGzip {
|
|
|
|
if err = packer.Pad(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
fileData = &packed.FileData{
|
|
|
|
Offset: packer.Pos(),
|
|
|
|
}
|
2019-03-13 21:55:31 +00:00
|
|
|
fileData.Length, err = packOneGzip(packer, data,
|
|
|
|
info.Uncompressed.Length)
|
2019-02-24 12:17:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2019-03-13 21:55:31 +00:00
|
|
|
if fileData.Length > 0 {
|
|
|
|
info.Gzip = fileData
|
|
|
|
}
|
2019-02-24 12:17:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// brotli compression
|
|
|
|
if BrotliPath != "" && !fileToPack.DisableBrotli {
|
|
|
|
if err = packer.Pad(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
fileData = &packed.FileData{
|
|
|
|
Offset: packer.Pos(),
|
|
|
|
}
|
2019-03-13 21:55:31 +00:00
|
|
|
fileData.Length, err = packOneBrotli(packer,
|
|
|
|
fileToPack.Filename, info.Uncompressed.Length)
|
2019-02-24 12:17:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2019-03-13 21:55:31 +00:00
|
|
|
if fileData.Length > 0 {
|
|
|
|
info.Brotli = fileData
|
|
|
|
}
|
2019-02-24 12:17:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func etag(in []byte) string {
|
|
|
|
h := sha512.New384()
|
|
|
|
h.Write(in)
|
|
|
|
return fmt.Sprintf(`"1--%x"`, h.Sum(nil))
|
|
|
|
}
|
|
|
|
|
2019-03-13 21:55:31 +00:00
|
|
|
func packOneGzip(packer *packWriter, data []byte, uncompressedSize uint64,
|
|
|
|
) (uint64, error) {
|
2019-02-24 12:17:57 +00:00
|
|
|
// write via temporary file
|
|
|
|
tmpfile, err := ioutil.TempFile("", "")
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer os.Remove(tmpfile.Name())
|
|
|
|
defer tmpfile.Close()
|
|
|
|
|
|
|
|
// compress
|
|
|
|
opts := zopfli.DefaultOptions()
|
|
|
|
if len(data) > (10 << 20) { // 10MiB
|
|
|
|
opts.NumIterations = 5
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := bufio.NewWriter(tmpfile)
|
|
|
|
if err = zopfli.GzipCompress(&opts, data, buf); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if err = buf.Flush(); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// copy into packfile
|
2019-03-13 21:55:31 +00:00
|
|
|
return packer.CopyIfSaving(tmpfile, uncompressedSize)
|
2019-02-24 12:17:57 +00:00
|
|
|
}
|
|
|
|
|
2019-03-13 21:55:31 +00:00
|
|
|
func packOneBrotli(packer *packWriter, filename string, uncompressedSize uint64,
|
|
|
|
) (uint64, error) {
|
2019-02-24 12:17:57 +00:00
|
|
|
// write via temporary file
|
|
|
|
tmpfile, err := ioutil.TempFile("", "")
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer os.Remove(tmpfile.Name())
|
|
|
|
defer tmpfile.Close()
|
|
|
|
|
|
|
|
// compress via commandline
|
|
|
|
cmd := exec.Command(BrotliPath, "--input", filename,
|
|
|
|
"--output", tmpfile.Name())
|
|
|
|
out, err := cmd.CombinedOutput()
|
|
|
|
if err != nil {
|
2019-03-13 21:55:31 +00:00
|
|
|
err = fmt.Errorf("brotli: %v (process reported: %s)", err, out)
|
2019-02-24 12:17:57 +00:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// copy into packfile
|
2019-03-13 21:55:31 +00:00
|
|
|
return packer.CopyIfSaving(tmpfile, uncompressedSize)
|
2019-02-24 12:17:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type packWriter struct {
|
|
|
|
f *os.File
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
|
|
|
func (pw *packWriter) Write(buf []byte) (int, error) {
|
|
|
|
if pw.err != nil {
|
|
|
|
return 0, pw.err
|
|
|
|
}
|
|
|
|
n, err := pw.f.Write(buf)
|
|
|
|
pw.err = err
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (pw *packWriter) Pos() uint64 {
|
|
|
|
pos, err := pw.f.Seek(0, os.SEEK_CUR)
|
|
|
|
if err != nil {
|
|
|
|
pw.err = err
|
|
|
|
}
|
|
|
|
return uint64(pos)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (pw *packWriter) Pad() error {
|
|
|
|
if pw.err != nil {
|
|
|
|
return pw.err
|
|
|
|
}
|
|
|
|
|
|
|
|
pos, err := pw.f.Seek(0, os.SEEK_CUR)
|
|
|
|
if err != nil {
|
|
|
|
pw.err = err
|
|
|
|
return pw.err
|
|
|
|
}
|
|
|
|
|
|
|
|
pos &= 0xFFF
|
|
|
|
if pos == 0 {
|
|
|
|
return pw.err
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err = pw.f.Seek(4096-pos, os.SEEK_CUR); err != nil {
|
|
|
|
pw.err = err
|
|
|
|
}
|
|
|
|
return pw.err
|
|
|
|
}
|
|
|
|
|
2019-03-13 21:55:31 +00:00
|
|
|
func (pw *packWriter) CopyIfSaving(in *os.File, uncompressedSize uint64) (uint64, error) {
|
2019-02-24 12:17:57 +00:00
|
|
|
if pw.err != nil {
|
|
|
|
return 0, pw.err
|
|
|
|
}
|
|
|
|
|
|
|
|
fi, err := in.Stat()
|
|
|
|
if err != nil {
|
|
|
|
pw.err = err
|
|
|
|
return 0, pw.err
|
|
|
|
}
|
2019-03-13 21:55:31 +00:00
|
|
|
sz := uint64(fi.Size())
|
2019-02-24 12:17:57 +00:00
|
|
|
|
2019-03-13 21:55:31 +00:00
|
|
|
if sz+minCompressionSaving > uncompressedSize {
|
|
|
|
return 0, nil
|
|
|
|
}
|
|
|
|
if sz+(uncompressedSize>>minCompressionFraction) > uncompressedSize {
|
|
|
|
return 0, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return pw.CopyFrom(in, fi)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (pw *packWriter) CopyFrom(in *os.File, fi os.FileInfo) (uint64, error) {
|
|
|
|
if pw.err != nil {
|
|
|
|
return 0, pw.err
|
|
|
|
}
|
2019-03-05 18:41:02 +00:00
|
|
|
|
2019-02-24 12:17:57 +00:00
|
|
|
var off int64
|
|
|
|
remain := fi.Size()
|
|
|
|
for remain > 0 {
|
|
|
|
var amt int
|
|
|
|
if remain > (1 << 30) {
|
|
|
|
amt = (1 << 30)
|
|
|
|
} else {
|
|
|
|
amt = int(remain)
|
|
|
|
}
|
|
|
|
|
2019-03-13 21:55:31 +00:00
|
|
|
amt, err := unix.Sendfile(int(pw.f.Fd()), int(in.Fd()), &off, amt)
|
2019-02-24 12:17:57 +00:00
|
|
|
remain -= int64(amt)
|
|
|
|
if err != nil {
|
2019-03-13 21:55:31 +00:00
|
|
|
pw.err = fmt.Errorf("sendfile (copying data to "+
|
|
|
|
"htpack): %v", err)
|
2019-02-24 12:17:57 +00:00
|
|
|
return uint64(off), pw.err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return uint64(off), nil
|
|
|
|
}
|