vmtouch可以方便的控制和诊断文件在系统Cache中的情况

官方文档在https://hoytech.com/vmtouch/

先大概翻译一下.

例子

例子1

查看/bin/目录下有多少(页)数据在Cache中

$ vmtouch /bin/
           Files: 92
     Directories: 1
  Resident Pages: 348/1307  1M/5M  26.6%
         Elapsed: 0.003426 seconds

例子2

查看big-dataset.txt文件有多少(页)在Cache中

$ vmtouch -v big-dataset.txt
big-dataset.txt
[                                                            ] 0/42116

           Files: 1
     Directories: 0
  Resident Pages: 0/42116  0/164M  0%
         Elapsed: 0.005182 seconds

一点也没有, 现在我们tail一下, 把部分数据”加载”到Cache

$ tail -n 10000 big-dataset.txt > /dev/null

$ vmtouch -v big-dataset.txt
big-dataset.txt
[                                                    oOOOOOOO] 4950/42116

           Files: 1
     Directories: 0
  Resident Pages: 4950/42116  19M/164M  11.8%
         Elapsed: 0.006706 seconds

例子3

让我们把big-dataset.txt文件中的所有数据都加载到内存中

$ vmtouch -vt big-dataset.txt
big-dataset.txt
[OOo                                                 oOOOOOOO] 6887/42116
[OOOOOOOOo                                           oOOOOOOO] 10631/42116
[OOOOOOOOOOOOOOo                                     oOOOOOOO] 15351/42116
[OOOOOOOOOOOOOOOOOOOOOo                              oOOOOOOO] 19719/42116
[OOOOOOOOOOOOOOOOOOOOOOOOOOOo                        oOOOOOOO] 24183/42116
[OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo                  oOOOOOOO] 28615/42116
[OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo              oOOOOOOO] 31415/42116
[OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo      oOOOOOOO] 36775/42116
[OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo  oOOOOOOO] 39431/42116
[OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO] 42116/42116

           Files: 1
     Directories: 0
   Touched Pages: 42116 (164M)
         Elapsed: 12.107 seconds

例子4

我们有三个数据文件 a.txt, b.txt, c.txt , 但内存只能放下其中两个. 现在 a.txt b.txt在内存了. 我们读取c.txt的时候可能会把a,b两个中的任意一个踢掉. 我们现在希望手工把a.txt从内存中踢掉.

$ vmtouch -ve a.txt
Evicting a.txt

           Files: 1
     Directories: 0
   Evicted Pages: 42116 (164M)
         Elapsed: 0.076824 seconds

例子5

后台运行, 把目录下的文件Lock到物理内存, 以防被踢掉.

vmtouch -dl /var/www/htdocs/critical/

what is good for

这几个例子已经可以让我们看到, vmtouch能做什么啦.

  • Discovering which files your OS is caching (这个好像有些不方便吧, 好像有另外的工具可以更快的做到这点)
  • Telling the OS to cache or evict certain files or regions of files
  • Locking files into memory so the OS won’t evict them
  • Preserving virtual memory profile when failing over servers
  • Keeping a “hot-standby” file-server
  • Plotting filesystem cache usage over time
  • Maintaining “soft quotas” of cache usage
  • Speeding up batch/cron jobs
  • And much more…

翻译完了 谈谈实现

代码就在github, 只有一个c文件, 不到1000行.
核心代码在vmtouch_file这个函数中.
核心中的核心只在三个系统调用 mmap mincore msync

  • 首先要调用mmap用来得到内存地址入口, 映射到要处理的文件的内容.
  • pagesize = sysconf(_SC_PAGESIZE) 获取系统的页大小, 文件大小除pagesize就是文件一共占多少页内存.
  • vmtouch -t , 用来把数据加载到Cache, 其实就是从头遍历每一页内存, 读取一下(只读第一个字节就够了)
  • vmtouch -e , 把数据从Cache中清掉, 实现是调用了msync系统调用, 第三个参数设置为MS_INVALIDATE
  • 来用查询Cache占用情况, 是用了mincore系统调用, 它会设置一个[]byte的数组, 长度代表一共多少page, 每一个字节代表这一页的情况.

      MINCORE_INCORE           0x1     /* Page is incore */
      MINCORE_REFERENCED       0x2     /* Page has been referenced by us */
      MINCORE_MODIFIED         0x4     /* Page has been modified by us */
      MINCORE_REFERENCED_OTHER 0x8     /* Page has been referenced */
      MINCORE_MODIFIED_OTHER  0x10     /* Page has been modified */
    

简单的golang实现示例

package main

import (
	"syscall"
	"unsafe"

	"golang.org/x/sys/unix"

	"github.com/golang/glog"
)

func bytes2pages(bytes int64, pagesize int) int64 {
	return (bytes + int64(pagesize) - 1) / int64(pagesize)
}

func takeToCahce(data []byte) []byte {
	pagesize := syscall.Getpagesize()
	pages_in_range := bytes2pages(int64(len(data)), pagesize)

	// c just used to make sure data is read, prevent any compiler optimizations
	var c []byte
	for i := int64(0); i < pages_in_range; i++ {
		c = append(c, data[i*int64(pagesize)])
	}
	return c
}

func evict(data []byte) {
	flags := unix.MS_INVALIDATE
	mmap_ptr := uintptr(unsafe.Pointer(&data[0]))
	size_ptr := uintptr(len(data))
	r, _, err := unix.Syscall(unix.SYS_MSYNC, mmap_ptr, size_ptr, uintptr(flags))
	if r != 0 {
		glog.Fatalf("msync error:%s", err.Error())
	}
}

func checkIfInCache(data []byte) {
	pagesize := syscall.Getpagesize()
	pages_in_range := bytes2pages(int64(len(data)), pagesize)
	vec := make([]byte, pages_in_range)
	mmap_ptr := uintptr(unsafe.Pointer(&data[0]))
	size_ptr := uintptr(len(data))
	vec_ptr := uintptr(unsafe.Pointer(&vec[0]))
	r, _, err := unix.Syscall(unix.SYS_MINCORE, mmap_ptr, size_ptr, vec_ptr)
	if r != 0 {
		glog.Fatalf("mincore error:%s", err.Error())
	}

	in := 0
	for _, e := range vec {
		if e&0x1 == 1 {
			in += 1
			glog.V(10).Info("MINCORE_INCORE")
		}
		if e&0x2 == 1 {
			glog.V(10).Info("MINCORE_REFERENCED")
		}
		if e&0x4 == 1 {
			glog.V(10).Info("MINCORE_MODIFIED")
		}
		if e&0x8 == 1 {
			glog.V(10).Info("MINCORE_REFERENCED_OTHER")
		}
		if e&0x10 == 1 {
			glog.V(10).Info("MINCORE_MODIFIED_OTHER")
		}
	}

	glog.Infof("%d/%d", in, pages_in_range)
}

func main() {
	var (
		filename    string = "a"
		offset      int64  = 0
		len_of_file int64
		stat        *syscall.Stat_t = &syscall.Stat_t{}
	)

	fd, err := syscall.Open(filename, syscall.O_RDWR, 0)
	if err != nil {
		glog.Fatalf("could not open %s:%s", filename, err)
	}
	glog.Infof("fd:%d", fd)

	err = syscall.Fstat(fd, stat)
	if err != nil {
		glog.Fatalf("could not stat %s:%s", filename, err)
	}

	// Get File Size
	len_of_file = stat.Size
	glog.Infof("file size:%d", len_of_file)

	data, err := syscall.Mmap(fd, offset, int(len_of_file), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED)
	if err != nil {
		glog.Fatalf("could not mmap %s:%s", filename, err)
	}
	glog.Infof("data length:%d", len(data))

	glog.Info("original stat:")
	checkIfInCache(data)

	glog.Info("take into cache")
	takeToCahce(data)

	checkIfInCache(data)

	glog.Info("evict from cache")
	evict(data)

	checkIfInCache(data)
}