]> git.ipfire.org Git - thirdparty/wireguard-go.git/commitdiff
tun: windows: don't spin unless we really need it
authorJason A. Donenfeld <Jason@zx2c4.com>
Sun, 18 Aug 2019 09:49:37 +0000 (11:49 +0200)
committerJason A. Donenfeld <Jason@zx2c4.com>
Mon, 19 Aug 2019 08:12:50 +0000 (10:12 +0200)
tun/tun_windows.go

index b0faed89736e3fdc96d0df434c10c1812f8827b2..a66f7097ea92b90a3cbd40a718fb1e7fa411ffd9 100644 (file)
@@ -19,11 +19,14 @@ import (
 )
 
 const (
-       packetAlignment    uint32 = 4        // Number of bytes packets are aligned to in rings
-       packetSizeMax             = 0xffff   // Maximum packet size
-       packetCapacity            = 0x800000 // Ring capacity, 8MiB
-       packetTrailingSize        = uint32(unsafe.Sizeof(packetHeader{})) + ((packetSizeMax + (packetAlignment - 1)) &^ (packetAlignment - 1)) - packetAlignment
-       ioctlRegisterRings        = (51820 << 16) | (0x970 << 2) | 0 /*METHOD_BUFFERED*/ | (0x3 /*FILE_READ_DATA | FILE_WRITE_DATA*/ << 14)
+       packetAlignment            = 4        // Number of bytes packets are aligned to in rings
+       packetSizeMax              = 0xffff   // Maximum packet size
+       packetCapacity             = 0x800000 // Ring capacity, 8MiB
+       packetTrailingSize         = uint32(unsafe.Sizeof(packetHeader{})) + ((packetSizeMax + (packetAlignment - 1)) &^ (packetAlignment - 1)) - packetAlignment
+       ioctlRegisterRings         = (51820 << 16) | (0x970 << 2) | 0 /*METHOD_BUFFERED*/ | (0x3 /*FILE_READ_DATA | FILE_WRITE_DATA*/ << 14)
+       rateMeasurementGranularity = uint64((time.Second / 2) / time.Nanosecond)
+       spinloopRateThreshold      = 800000000 / 8                                   // 800mbps
+       spinloopDuration           = uint64(time.Millisecond / 80 / time.Nanosecond) // ~1gbit/s
 )
 
 type packetHeader struct {
@@ -50,6 +53,13 @@ type ringDescriptor struct {
        }
 }
 
+type rateJuggler struct {
+       current       uint64
+       nextByteCount uint64
+       nextStartTime int64
+       changing      int32
+}
+
 type NativeTun struct {
        wt        *wintun.Wintun
        handle    windows.Handle
@@ -58,8 +68,15 @@ type NativeTun struct {
        events    chan Event
        errors    chan error
        forcedMTU int
+       rate      rateJuggler
 }
 
+//go:linkname procyield runtime.procyield
+func procyield(cycles uint32)
+
+//go:linkname nanotime runtime.nanotime
+func nanotime() int64
+
 func packetAlign(size uint32) uint32 {
        return (size + (packetAlignment - 1)) &^ (packetAlignment - 1)
 }
@@ -184,9 +201,6 @@ func (tun *NativeTun) ForceMTU(mtu int) {
        tun.forcedMTU = mtu
 }
 
-//go:linkname procyield runtime.procyield
-func procyield(cycles uint32)
-
 // Note: Read() and Write() assume the caller comes only from a single thread; there's no locking.
 
 func (tun *NativeTun) Read(buff []byte, offset int) (int, error) {
@@ -205,7 +219,8 @@ retry:
                return 0, os.ErrClosed
        }
 
-       start := time.Now()
+       start := nanotime()
+       shouldSpin := atomic.LoadUint64(&tun.rate.current) >= spinloopRateThreshold && uint64(start-atomic.LoadInt64(&tun.rate.nextStartTime)) <= rateMeasurementGranularity*2
        var buffTail uint32
        for {
                buffTail = atomic.LoadUint32(&tun.rings.send.ring.tail)
@@ -215,7 +230,7 @@ retry:
                if tun.close {
                        return 0, os.ErrClosed
                }
-               if time.Since(start) >= time.Millisecond/80 /* ~1gbit/s */ {
+               if !shouldSpin || uint64(nanotime()-start) >= spinloopDuration {
                        windows.WaitForSingleObject(tun.rings.send.tailMoved, windows.INFINITE)
                        goto retry
                }
@@ -243,6 +258,7 @@ retry:
        copy(buff[offset:], packet.data[:packet.size])
        buffHead = tun.rings.send.ring.wrap(buffHead + alignedPacketSize)
        atomic.StoreUint32(&tun.rings.send.ring.head, buffHead)
+       tun.rate.update(uint64(packet.size))
        return int(packet.size), nil
 }
 
@@ -256,6 +272,7 @@ func (tun *NativeTun) Write(buff []byte, offset int) (int, error) {
        }
 
        packetSize := uint32(len(buff) - offset)
+       tun.rate.update(uint64(packetSize))
        alignedPacketSize := packetAlign(uint32(unsafe.Sizeof(packetHeader{})) + packetSize)
 
        buffHead := atomic.LoadUint32(&tun.rings.receive.ring.head)
@@ -292,3 +309,18 @@ func (tun *NativeTun) LUID() uint64 {
 func (rb *ring) wrap(value uint32) uint32 {
        return value & (packetCapacity - 1)
 }
+
+func (rate *rateJuggler) update(packetLen uint64) {
+       now := nanotime()
+       total := atomic.AddUint64(&rate.nextByteCount, packetLen)
+       period := uint64(now - atomic.LoadInt64(&rate.nextStartTime))
+       if period >= rateMeasurementGranularity {
+               if !atomic.CompareAndSwapInt32(&rate.changing, 0, 1) {
+                       return
+               }
+               atomic.StoreInt64(&rate.nextStartTime, now)
+               atomic.StoreUint64(&rate.current, total*uint64(time.Second/time.Nanosecond)/period)
+               atomic.StoreUint64(&rate.nextByteCount, 0)
+               atomic.StoreInt32(&rate.changing, 0)
+       }
+}