summaryrefslogtreecommitdiffstats
path: root/fs/pipe.c
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@virtuozzo.com>2016-07-26 15:24:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 16:19:19 -0700
commitd86133bd396f5e4a8d5629c1b853b574de4faf32 (patch)
treec4e669aa8662ea6973b67ae72fcdb7e0597ed548 /fs/pipe.c
parent3e79ec7ddc33e5c69c20ce7f768d0e5c8b824f69 (diff)
downloadlinux-0-day-d86133bd396f5e4a8d5629c1b853b574de4faf32.tar.gz
linux-0-day-d86133bd396f5e4a8d5629c1b853b574de4faf32.tar.xz
pipe: account to kmemcg
Pipes can consume a significant amount of system memory, hence they should be accounted to kmemcg. This patch marks pipe_inode_info and anonymous pipe buffer page allocations as __GFP_ACCOUNT so that they would be charged to kmemcg. Note, since a pipe buffer page can be "stolen" and get reused for other purposes, including mapping to userspace, we clear PageKmemcg thus resetting page->_mapcount and uncharge it in anon_pipe_buf_steal, which is introduced by this patch. A note regarding anon_pipe_buf_steal implementation. We allow to steal the page if its ref count equals 1. It looks racy, but it is correct for anonymous pipe buffer pages, because: - We lock out all other pipe users, because ->steal is called with pipe_lock held, so the page can't be spliced to another pipe from under us. - The page is not on LRU and it never was. - Thus a parallel thread can access it only by PFN. Although this is quite possible (e.g. see page_idle_get_page and balloon_page_isolate) this is not dangerous, because all such functions do is increase page ref count, check if the page is the one they are looking for, and decrease ref count if it isn't. Since our page is clean except for PageKmemcg mark, which doesn't conflict with other _mapcount users, the worst that can happen is we see page_count > 2 due to a transient ref, in which case we false-positively abort ->steal, which is still fine, because ->steal is not guaranteed to succeed. Link: http://lkml.kernel.org/r/20160527150313.GD26059@esperanza Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/pipe.c')
-rw-r--r--fs/pipe.c32
1 files changed, 26 insertions, 6 deletions
diff --git a/fs/pipe.c b/fs/pipe.c
index 0d3f5165cb0b8..4b32928f54266 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,6 +21,7 @@
#include <linux/audit.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
+#include <linux/memcontrol.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
@@ -137,6 +138,22 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
put_page(page);
}
+static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ struct page *page = buf->page;
+
+ if (page_count(page) == 1) {
+ if (memcg_kmem_enabled()) {
+ memcg_kmem_uncharge(page, 0);
+ __ClearPageKmemcg(page);
+ }
+ __SetPageLocked(page);
+ return 0;
+ }
+ return 1;
+}
+
/**
* generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
* @pipe: the pipe that the buffer belongs to
@@ -219,7 +236,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
.can_merge = 1,
.confirm = generic_pipe_buf_confirm,
.release = anon_pipe_buf_release,
- .steal = generic_pipe_buf_steal,
+ .steal = anon_pipe_buf_steal,
.get = generic_pipe_buf_get,
};
@@ -227,7 +244,7 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = {
.can_merge = 0,
.confirm = generic_pipe_buf_confirm,
.release = anon_pipe_buf_release,
- .steal = generic_pipe_buf_steal,
+ .steal = anon_pipe_buf_steal,
.get = generic_pipe_buf_get,
};
@@ -405,7 +422,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
int copied;
if (!page) {
- page = alloc_page(GFP_HIGHUSER);
+ page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
if (unlikely(!page)) {
ret = ret ? : -ENOMEM;
break;
@@ -611,7 +628,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
{
struct pipe_inode_info *pipe;
- pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
+ pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
if (pipe) {
unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
struct user_struct *user = get_current_user();
@@ -619,7 +636,9 @@ struct pipe_inode_info *alloc_pipe_info(void)
if (!too_many_pipe_buffers_hard(user)) {
if (too_many_pipe_buffers_soft(user))
pipe_bufs = 1;
- pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
+ pipe->bufs = kcalloc(pipe_bufs,
+ sizeof(struct pipe_buffer),
+ GFP_KERNEL_ACCOUNT);
}
if (pipe->bufs) {
@@ -1010,7 +1029,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
if (nr_pages < pipe->nrbufs)
return -EBUSY;
- bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN);
+ bufs = kcalloc(nr_pages, sizeof(*bufs),
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (unlikely(!bufs))
return -ENOMEM;