Merge remote-tracking branch 'upstream/next' into fix-event-handling

2023-06-17 01:25:12 +02:00
parent c065ad1b8d 01bb26012c
commit 043703e58f
28 changed files with 976 additions and 204 deletions
--- a/src/backend/backend.c
+++ b/src/backend/backend.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright (c) Yuxuan Shui <yshuiv7@gmail.com>
+#include <inttypes.h>
 #include <xcb/sync.h>
 #include <xcb/xcb.h>

@@ -81,7 +82,10 @@ void handle_device_reset(session_t *ps) {
 }

 /// paint all windows
-void paint_all_new(session_t *ps, struct managed_win *t, bool ignore_damage) {
+void paint_all_new(session_t *ps, struct managed_win *t) {
+	struct timespec now = get_time_timespec();
+	auto paint_all_start_us =
+	    (uint64_t)now.tv_sec * 1000000UL + (uint64_t)now.tv_nsec / 1000;
 	if (ps->backend_data->ops->device_status &&
 	    ps->backend_data->ops->device_status(ps->backend_data) != DEVICE_STATUS_NORMAL) {
 		return handle_device_reset(ps);
@@ -96,16 +100,17 @@ void paint_all_new(session_t *ps, struct managed_win *t, bool ignore_damage) {
 			ps->xsync_exists = false;
 		}
 	}
+
+	now = get_time_timespec();
+	auto after_sync_fence_us =
+	    (uint64_t)now.tv_sec * 1000000UL + (uint64_t)now.tv_nsec / 1000;
+	log_trace("Time spent on sync fence: %" PRIu64 " us",
+	          after_sync_fence_us - paint_all_start_us);
 	// All painting will be limited to the damage, if _some_ of
 	// the paints bleed out of the damage region, it will destroy
 	// part of the image we want to reuse
 	region_t reg_damage;
-	if (!ignore_damage) {
-		reg_damage = get_damage(ps, ps->o.monitor_repaint || !ps->o.use_damage);
-	} else {
-		pixman_region32_init(&reg_damage);
-		pixman_region32_copy(&reg_damage, &ps->screen_reg);
-	}
+	reg_damage = get_damage(ps, ps->o.monitor_repaint || !ps->o.use_damage);

 	if (!pixman_region32_not_empty(&reg_damage)) {
 		pixman_region32_fini(&reg_damage);
@@ -181,6 +186,21 @@ void paint_all_new(session_t *ps, struct managed_win *t, bool ignore_damage) {
 	region_t reg_shadow_clip;
 	pixman_region32_init(&reg_shadow_clip);

+	now = get_time_timespec();
+	auto after_damage_us = (uint64_t)now.tv_sec * 1000000UL + (uint64_t)now.tv_nsec / 1000;
+	log_trace("Getting damage took %" PRIu64 " us", after_damage_us - after_sync_fence_us);
+	if (ps->next_render > 0) {
+		log_trace("Render schedule deviation: %ld us (%s) %" PRIu64 " %ld",
+		          labs((long)after_damage_us - (long)ps->next_render),
+		          after_damage_us < ps->next_render ? "early" : "late",
+		          after_damage_us, ps->next_render);
+		ps->last_schedule_delay = 0;
+		if (after_damage_us > ps->next_render) {
+			ps->last_schedule_delay = after_damage_us - ps->next_render;
+		}
+	}
+	ps->did_render = true;
+
 	if (ps->backend_data->ops->prepare) {
 		ps->backend_data->ops->prepare(ps->backend_data, &reg_paint);
 	}
--- a/src/backend/backend.h
+++ b/src/backend/backend.h
@@ -292,6 +292,14 @@ struct backend_operations {
 	/// Optional
 	int (*buffer_age)(backend_t *backend_data);

+	/// Get the render time of the last frame. If the render is still in progress,
+	/// returns false. The time is returned in `ts`. Frames are delimited by the
+	/// present() calls. i.e. after a present() call, last_render_time() should start
+	/// reporting the time of the just presen1ted frame.
+	///
+	/// Optional, if not available, the most conservative estimation will be used.
+	bool (*last_render_time)(backend_t *backend_data, struct timespec *ts);
+
 	/// The maximum number buffer_age might return.
 	int max_buffer_age;

@@ -363,5 +371,4 @@ struct backend_operations {

 extern struct backend_operations *backend_list[];

-void paint_all_new(session_t *ps, struct managed_win *const t, bool ignore_damage)
-    attr_nonnull(1);
+void paint_all_new(session_t *ps, struct managed_win *const t) attr_nonnull(1);
--- a/src/backend/backend_common.c
+++ b/src/backend/backend_common.c
@@ -294,13 +294,14 @@ shadow_picture_err:
 void *default_backend_render_shadow(backend_t *backend_data, int width, int height,
                                    struct backend_shadow_context *sctx, struct color color) {
 	const conv *kernel = (void *)sctx;
-	xcb_pixmap_t shadow_pixel = solid_picture(backend_data->c, backend_data->root, true,
-	                                          1, color.red, color.green, color.blue),
-	             shadow = XCB_NONE;
+	xcb_render_picture_t shadow_pixel = solid_picture(
+	    backend_data->c, backend_data->root, true, 1, color.red, color.green, color.blue);
+	xcb_pixmap_t shadow = XCB_NONE;
 	xcb_render_picture_t pict = XCB_NONE;

 	if (!build_shadow(backend_data->c, backend_data->root, color.alpha, width, height,
 	                  kernel, shadow_pixel, &shadow, &pict)) {
+		xcb_render_free_picture(backend_data->c, shadow_pixel);
 		return NULL;
 	}

@@ -308,6 +309,7 @@ void *default_backend_render_shadow(backend_t *backend_data, int width, int heig
 	void *ret = backend_data->ops->bind_pixmap(
 	    backend_data, shadow, x_get_visual_info(backend_data->c, visual), true);
 	xcb_render_free_picture(backend_data->c, pict);
+	xcb_render_free_picture(backend_data->c, shadow_pixel);
 	return ret;
 }

--- a/src/backend/driver.c
+++ b/src/backend/driver.c
@@ -15,8 +15,6 @@
 /// Apply driver specified global workarounds. It's safe to call this multiple times.
 void apply_driver_workarounds(struct session *ps, enum driver driver) {
 	if (driver & DRIVER_NVIDIA) {
-		// setenv("__GL_YIELD", "usleep", true);
-		setenv("__GL_MaxFramesAllowed", "1", true);
 		ps->o.xrender_sync_fence = true;
 	}
 }
--- a/src/backend/dummy/dummy.c
+++ b/src/backend/dummy/dummy.c
@@ -17,6 +17,7 @@ struct dummy_image {
 	xcb_pixmap_t pixmap;
 	bool transparent;
 	int *refcount;
+	bool owned;
 	UT_hash_handle hh;
 };

@@ -42,6 +43,9 @@ void dummy_deinit(struct backend_base *data) {
 		log_warn("Backend image for pixmap %#010x is not freed", img->pixmap);
 		HASH_DEL(dummy->images, img);
 		free(img->refcount);
+		if (img->owned) {
+			xcb_free_pixmap(data->c, img->pixmap);
+		}
 		free(img);
 	}
 	free(dummy);
@@ -82,7 +86,7 @@ bool dummy_blur(struct backend_base *backend_data attr_unused, double opacity at
 }

 void *dummy_bind_pixmap(struct backend_base *base, xcb_pixmap_t pixmap,
-                        struct xvisual_info fmt, bool owned attr_unused) {
+                        struct xvisual_info fmt, bool owned) {
 	auto dummy = (struct dummy_data *)base;
 	struct dummy_image *img = NULL;
 	HASH_FIND_INT(dummy->images, &pixmap, img);
@@ -96,6 +100,7 @@ void *dummy_bind_pixmap(struct backend_base *base, xcb_pixmap_t pixmap,
 	img->transparent = fmt.alpha_size != 0;
 	img->refcount = ccalloc(1, int);
 	*img->refcount = 1;
+	img->owned = owned;

 	HASH_ADD_INT(dummy->images, pixmap, img);
 	return (void *)img;
@@ -112,6 +117,9 @@ void dummy_release_image(backend_t *base, void *image) {
 	if (*img->refcount == 0) {
 		HASH_DEL(dummy->images, img);
 		free(img->refcount);
+		if (img->owned) {
+			xcb_free_pixmap(base->c, img->pixmap);
+		}
 		free(img);
 	}
 }
@@ -162,7 +170,7 @@ void dummy_destroy_blur_context(struct backend_base *base attr_unused, void *ctx
 }

 void dummy_get_blur_size(void *ctx attr_unused, int *width, int *height) {
-	// These numbers are arbitrary, to make sure the reisze_region code path is
+	// These numbers are arbitrary, to make sure the resize_region code path is
 	// covered.
 	*width = 5;
 	*height = 5;
--- a/src/backend/gl/egl.c
+++ b/src/backend/gl/egl.c
@@ -42,6 +42,30 @@ static PFNEGLDESTROYIMAGEKHRPROC eglDestroyImageProc = NULL;
 static PFNEGLGETPLATFORMDISPLAYPROC eglGetPlatformDisplayProc = NULL;
 static PFNEGLCREATEPLATFORMWINDOWSURFACEPROC eglCreatePlatformWindowSurfaceProc = NULL;

+const char *eglGetErrorString(EGLint error) {
+#define CASE_STR(value)                                                                  \
+	case value: return #value;
+	switch (error) {
+		CASE_STR(EGL_SUCCESS)
+		CASE_STR(EGL_NOT_INITIALIZED)
+		CASE_STR(EGL_BAD_ACCESS)
+		CASE_STR(EGL_BAD_ALLOC)
+		CASE_STR(EGL_BAD_ATTRIBUTE)
+		CASE_STR(EGL_BAD_CONTEXT)
+		CASE_STR(EGL_BAD_CONFIG)
+		CASE_STR(EGL_BAD_CURRENT_SURFACE)
+		CASE_STR(EGL_BAD_DISPLAY)
+		CASE_STR(EGL_BAD_SURFACE)
+		CASE_STR(EGL_BAD_MATCH)
+		CASE_STR(EGL_BAD_PARAMETER)
+		CASE_STR(EGL_BAD_NATIVE_PIXMAP)
+		CASE_STR(EGL_BAD_NATIVE_WINDOW)
+		CASE_STR(EGL_CONTEXT_LOST)
+	default: return "Unknown";
+	}
+#undef CASE_STR
+}
+
 /**
 * Free a glx_texture_t.
 */
@@ -283,7 +307,8 @@ egl_bind_pixmap(backend_t *base, xcb_pixmap_t pixmap, struct xvisual_info fmt, b
 	eglpixmap->owned = owned;

 	if (eglpixmap->image == EGL_NO_IMAGE) {
-		log_error("Failed to create eglpixmap for pixmap %#010x", pixmap);
+		log_error("Failed to create eglpixmap for pixmap %#010x: %s", pixmap,
+		          eglGetErrorString(eglGetError()));
 		goto err;
 	}

@@ -320,9 +345,6 @@ static void egl_present(backend_t *base, const region_t *region attr_unused) {
 	struct egl_data *gd = (void *)base;
 	gl_present(base, region);
 	eglSwapBuffers(gd->display, gd->target_win);
-	if (!gd->gl.is_nvidia) {
-		glFinish();
-	}
 }

 static int egl_buffer_age(backend_t *base) {
@@ -372,6 +394,7 @@ struct backend_operations egl_ops = {
    .deinit = egl_deinit,
    .bind_pixmap = egl_bind_pixmap,
    .release_image = gl_release_image,
+    .prepare = gl_prepare,
    .compose = gl_compose,
    .image_op = gl_image_op,
    .set_image_property = gl_set_image_property,
@@ -380,6 +403,7 @@ struct backend_operations egl_ops = {
    .is_image_transparent = default_is_image_transparent,
    .present = egl_present,
    .buffer_age = egl_buffer_age,
+    .last_render_time = gl_last_render_time,
    .create_shadow_context = gl_create_shadow_context,
    .destroy_shadow_context = gl_destroy_shadow_context,
    .render_shadow = backend_render_shadow_from_mask,
--- a/src/backend/gl/gl_common.c
+++ b/src/backend/gl/gl_common.c
@@ -22,6 +22,11 @@
 #include "backend/backend_common.h"
 #include "backend/gl/gl_common.h"

+void gl_prepare(backend_t *base, const region_t *reg attr_unused) {
+	auto gd = (struct gl_data *)base;
+	glBeginQuery(GL_TIME_ELAPSED, gd->frame_timing[gd->current_frame_timing]);
+}
+
 GLuint gl_create_shader(GLenum shader_type, const char *shader_str) {
 	log_trace("===\n%s\n===", shader_str);

@@ -800,6 +805,9 @@ uint64_t gl_get_shader_attributes(backend_t *backend_data attr_unused, void *sha
 }

 bool gl_init(struct gl_data *gd, session_t *ps) {
+	glGenQueries(2, gd->frame_timing);
+	gd->current_frame_timing = 0;
+
 	// Initialize GLX data structure
 	glDisable(GL_DEPTH_TEST);
 	glDepthMask(GL_FALSE);
@@ -945,7 +953,7 @@ bool gl_init(struct gl_data *gd, session_t *ps) {
 	const char *vendor = (const char *)glGetString(GL_VENDOR);
 	log_debug("GL_VENDOR = %s", vendor);
 	if (strcmp(vendor, "NVIDIA Corporation") == 0) {
-		log_info("GL vendor is NVIDIA, don't use glFinish");
+		log_info("GL vendor is NVIDIA, enable xrender sync fence.");
 		gd->is_nvidia = true;
 	} else {
 		gd->is_nvidia = false;
@@ -968,6 +976,9 @@ void gl_deinit(struct gl_data *gd) {
 		gd->default_shader = NULL;
 	}

+	glDeleteTextures(1, &gd->default_mask_texture);
+	glDeleteTextures(1, &gd->back_texture);
+
 	gl_check_err();
 }

@@ -1154,10 +1165,33 @@ void gl_present(backend_t *base, const region_t *region) {
 	glDeleteBuffers(2, bo);
 	glDeleteVertexArrays(1, &vao);

+	glEndQuery(GL_TIME_ELAPSED);
+	gd->current_frame_timing ^= 1;
+
+	gl_check_err();
+
 	free(coord);
 	free(indices);
 }

+bool gl_last_render_time(backend_t *base, struct timespec *ts) {
+	auto gd = (struct gl_data *)base;
+	GLint available = 0;
+	glGetQueryObjectiv(gd->frame_timing[gd->current_frame_timing ^ 1],
+	                   GL_QUERY_RESULT_AVAILABLE, &available);
+	if (!available) {
+		return false;
+	}
+
+	GLuint64 time;
+	glGetQueryObjectui64v(gd->frame_timing[gd->current_frame_timing ^ 1],
+	                      GL_QUERY_RESULT, &time);
+	ts->tv_sec = (long)(time / 1000000000);
+	ts->tv_nsec = (long)(time % 1000000000);
+	gl_check_err();
+	return true;
+}
+
 bool gl_image_op(backend_t *base, enum image_operations op, void *image_data,
                 const region_t *reg_op, const region_t *reg_visible attr_unused, void *arg) {
 	struct backend_image *tex = image_data;
--- a/src/backend/gl/gl_common.h
+++ b/src/backend/gl/gl_common.h
@@ -108,6 +108,8 @@ struct gl_data {
 	gl_shadow_shader_t shadow_shader;
 	GLuint back_texture, back_fbo;
 	GLint back_format;
+	GLuint frame_timing[2];
+	int current_frame_timing;
 	GLuint present_prog;

 	bool dithered_present;
@@ -129,6 +131,7 @@ typedef struct session session_t;
 #define GL_PROG_MAIN_INIT                                                                \
 	{ .prog = 0, .unifm_opacity = -1, .unifm_invert_color = -1, .unifm_tex = -1, }

+void gl_prepare(backend_t *base, const region_t *reg);
 void x_rect_to_coords(int nrects, const rect_t *rects, coord_t image_dst,
                      int extent_height, int texture_height, int root_height,
                      bool y_inverted, GLint *coord, GLuint *indices);
@@ -142,6 +145,7 @@ void gl_destroy_window_shader(backend_t *backend_data, void *shader);
 uint64_t gl_get_shader_attributes(backend_t *backend_data, void *shader);
 bool gl_set_image_property(backend_t *backend_data, enum image_properties prop,
                           void *image_data, void *args);
+bool gl_last_render_time(backend_t *backend_data, struct timespec *time);

 /**
 * @brief Render a region with texture data.
@@ -184,10 +188,6 @@ void gl_present(backend_t *base, const region_t *);
 bool gl_read_pixel(backend_t *base, void *image_data, int x, int y, struct color *output);
 enum device_status gl_device_status(backend_t *base);

-static inline void gl_delete_texture(GLuint texture) {
-	glDeleteTextures(1, &texture);
-}
-
 /**
 * Get a textual representation of an OpenGL error.
 */
--- a/src/backend/gl/glx.c
+++ b/src/backend/gl/glx.c
@@ -468,9 +468,6 @@ static void glx_present(backend_t *base, const region_t *region attr_unused) {
 	struct _glx_data *gd = (void *)base;
 	gl_present(base, region);
 	glXSwapBuffers(gd->display, gd->target_win);
-	if (!gd->gl.is_nvidia) {
-		glFinish();
-	}
 }

 static int glx_buffer_age(backend_t *base) {
@@ -528,6 +525,7 @@ struct backend_operations glx_ops = {
    .deinit = glx_deinit,
    .bind_pixmap = glx_bind_pixmap,
    .release_image = gl_release_image,
+    .prepare = gl_prepare,
    .compose = gl_compose,
    .image_op = gl_image_op,
    .set_image_property = gl_set_image_property,
@@ -536,6 +534,7 @@ struct backend_operations glx_ops = {
    .is_image_transparent = default_is_image_transparent,
    .present = glx_present,
    .buffer_age = glx_buffer_age,
+    .last_render_time = gl_last_render_time,
    .create_shadow_context = gl_create_shadow_context,
    .destroy_shadow_context = gl_destroy_shadow_context,
    .render_shadow = backend_render_shadow_from_mask,
--- a/src/backend/gl/shaders.c
+++ b/src/backend/gl/shaders.c
@@ -202,7 +202,8 @@ const char dither_glsl[] = GLSL(330,
 	}
 	vec4 dither(vec4 c, vec2 coord) {
 		vec4 residual = mod(c, 1.0 / 255.0);
-		vec4 dithered = vec4(greaterThan(residual, vec4(1e-4)));
+		residual = min(residual, vec4(1.0 / 255.0) - residual);
+		vec4 dithered = vec4(greaterThan(residual, vec4(1.0 / 65535.0)));
 		return vec4(c + dithered * bayer(coord) / 255.0);
 	}
 );
--- a/src/backend/xrender/xrender.c
+++ b/src/backend/xrender/xrender.c
@@ -520,8 +520,9 @@ bind_pixmap(backend_t *base, xcb_pixmap_t pixmap, struct xvisual_info fmt, bool
 	inner->height = img->base.eheight = r->height;
 	inner->pixmap = pixmap;
 	inner->has_alpha = fmt.alpha_size != 0;
-	inner->pict =
-	    x_create_picture_with_visual_and_pixmap(base->c, fmt.visual, pixmap, 0, NULL);
+	xcb_render_create_picture_value_list_t pic_attrs = {.repeat = XCB_RENDER_REPEAT_NORMAL};
+	inner->pict = x_create_picture_with_visual_and_pixmap(
+	    base->c, fmt.visual, pixmap, XCB_RENDER_CP_REPEAT, &pic_attrs);
 	inner->owned = owned;
 	inner->visual = fmt.visual;
 	inner->refcount = 1;
--- a/src/common.h
+++ b/src/common.h
@@ -58,6 +58,7 @@
 #include "list.h"
 #include "region.h"
 #include "render.h"
+#include "statistics.h"
 #include "types.h"
 #include "utils.h"
 #include "win_defs.h"
@@ -156,9 +157,8 @@ typedef struct session {
 	ev_timer unredir_timer;
 	/// Timer for fading
 	ev_timer fade_timer;
-	/// Use an ev_idle callback for drawing
-	/// So we only start drawing when events are processed
-	ev_idle draw_idle;
+	/// Use an ev_timer callback for drawing
+	ev_timer draw_timer;
 	/// Called every time we have timeouts or new data on socket,
 	/// so we can be sure if xcb read from X socket at anytime during event
 	/// handling, we will not left any event unhandled in the queue
@@ -240,6 +240,29 @@ typedef struct session {
 	bool first_frame;
 	/// Whether screen has been turned off
 	bool screen_is_off;
+	/// Event context for X Present extension.
+	uint32_t present_event_id;
+	xcb_special_event_t *present_event;
+	/// When last MSC event happened, in useconds.
+	uint64_t last_msc_instant;
+	/// The last MSC number
+	uint64_t last_msc;
+	/// When the currently rendered frame will be displayed.
+	/// 0 means there is no pending frame.
+	uint64_t target_msc;
+	/// The delay between when the last frame was scheduled to be rendered, and when
+	/// the render actually started.
+	uint64_t last_schedule_delay;
+	/// When do we want our next frame to start rendering.
+	uint64_t next_render;
+	/// Did we actually render the last frame. Sometimes redraw will be scheduled only
+	/// to find out nothing has changed. In which case this will be set to false.
+	bool did_render;
+	/// Whether we can perform frame pacing.
+	bool frame_pacing;
+
+	/// Render statistics
+	struct render_statistics render_stats;

 	// === Operation related ===
 	/// Flags related to the root window
--- a/src/config.c
+++ b/src/config.c
@@ -509,32 +509,37 @@ parse_geometry_end:
 }

 /**
- * Parse a list of opacity rules.
+ * Parse a list of window rules, prefixed with a number, separated by a ':'
 */
-bool parse_rule_opacity(c2_lptr_t **res, const char *src) {
-	// Find opacity value
+bool parse_numeric_window_rule(c2_lptr_t **res, const char *src, long min, long max) {
+	if (!src) {
+		return false;
+	}
+
+	// Find numeric value
 	char *endptr = NULL;
 	long val = strtol(src, &endptr, 0);
 	if (!endptr || endptr == src) {
-		log_error("No opacity specified: %s", src);
+		log_error("No number specified: %s", src);
 		return false;
 	}
-	if (val > 100 || val < 0) {
-		log_error("Opacity %ld invalid: %s", val, src);
+
+	if (val < min || val > max) {
+		log_error("Number not in range (%ld <= n <= %ld): %s", min, max, src);
 		return false;
 	}

 	// Skip over spaces
-	while (*endptr && isspace((unsigned char)*endptr))
+	while (*endptr && isspace((unsigned char)*endptr)) {
 		++endptr;
+	}
 	if (':' != *endptr) {
-		log_error("Opacity terminator not found: %s", src);
+		log_error("Number separator (':') not found: %s", src);
 		return false;
 	}
 	++endptr;

 	// Parse pattern
-	// I hope 1-100 is acceptable for (void *)
 	return c2_parse(res, endptr, (void *)val);
 }

@@ -737,6 +742,7 @@ char *parse_config(options_t *opt, const char *config_file, bool *shadow_enable,
 	    .logpath = NULL,

 	    .use_damage = true,
+	    .no_frame_pacing = false,

 	    .shadow_red = 0.0,
 	    .shadow_green = 0.0,
--- a/src/config.h
+++ b/src/config.h
@@ -140,6 +140,8 @@ typedef struct options {
 	bool vsync_use_glfinish;
 	/// Whether use damage information to help limit the area to paint
 	bool use_damage;
+	/// Disable frame pacing
+	bool no_frame_pacing;

 	// === Shadow ===
 	/// Red, green and blue tone of the shadow.
@@ -228,6 +230,8 @@ typedef struct options {
 	int corner_radius;
 	/// Rounded corners blacklist. A linked list of conditions.
 	c2_lptr_t *rounded_corners_blacklist;
+	/// Rounded corner rules. A linked list of conditions.
+	c2_lptr_t *corner_radius_rules;

 	// === Focus related ===
 	/// Whether to try to detect WM windows and mark them as focused.
@@ -266,7 +270,7 @@ bool must_use parse_long(const char *, long *);
 bool must_use parse_int(const char *, int *);
 struct conv **must_use parse_blur_kern_lst(const char *, bool *hasneg, int *count);
 bool must_use parse_geometry(session_t *, const char *, region_t *);
-bool must_use parse_rule_opacity(c2_lptr_t **, const char *);
+bool must_use parse_numeric_window_rule(c2_lptr_t **, const char *, long, long);
 bool must_use parse_rule_window_shader(c2_lptr_t **, const char *, const char *);
 char *must_use locate_auxiliary_file(const char *scope, const char *path,
                                     const char *include_dir);
--- a/src/config_libconfig.c
+++ b/src/config_libconfig.c
@@ -135,6 +135,32 @@ void parse_cfg_condlst(const config_t *pcfg, c2_lptr_t **pcondlst, const char *n
 	}
 }

+/**
+ * Parse a window corner radius rule list in configuration file.
+ */
+static inline void
+parse_cfg_condlst_corner(options_t *opt, const config_t *pcfg, const char *name) {
+	config_setting_t *setting = config_lookup(pcfg, name);
+	if (setting) {
+		// Parse an array of options
+		if (config_setting_is_array(setting)) {
+			int i = config_setting_length(setting);
+			while (i--)
+				if (!parse_numeric_window_rule(
+				        &opt->corner_radius_rules,
+				        config_setting_get_string_elem(setting, i), 0, INT_MAX))
+					exit(1);
+		}
+		// Treat it as a single pattern if it's a string
+		else if (config_setting_type(setting) == CONFIG_TYPE_STRING) {
+			if (!parse_numeric_window_rule(&opt->corner_radius_rules,
+			                               config_setting_get_string(setting),
+			                               0, INT_MAX))
+				exit(1);
+		}
+	}
+}
+
 /**
 * Parse an opacity rule list in configuration file.
 */
@@ -146,15 +172,15 @@ parse_cfg_condlst_opct(options_t *opt, const config_t *pcfg, const char *name) {
 		if (config_setting_is_array(setting)) {
 			int i = config_setting_length(setting);
 			while (i--)
-				if (!parse_rule_opacity(
+				if (!parse_numeric_window_rule(
 				        &opt->opacity_rules,
-				        config_setting_get_string_elem(setting, i)))
+				        config_setting_get_string_elem(setting, i), 0, 100))
 					exit(1);
 		}
 		// Treat it as a single pattern if it's a string
 		else if (config_setting_type(setting) == CONFIG_TYPE_STRING) {
-			if (!parse_rule_opacity(&opt->opacity_rules,
-			                        config_setting_get_string(setting)))
+			if (!parse_numeric_window_rule(
+			        &opt->opacity_rules, config_setting_get_string(setting), 0, 100))
 				exit(1);
 		}
 	}
@@ -334,6 +360,12 @@ char *parse_config_libconfig(options_t *opt, const char *config_file, bool *shad
 	config_lookup_int(&cfg, "corner-radius", &opt->corner_radius);
 	// --rounded-corners-exclude
 	parse_cfg_condlst(&cfg, &opt->rounded_corners_blacklist, "rounded-corners-exclude");
+	// --corner-radius-rules
+	parse_cfg_condlst_corner(opt, &cfg, "corner-radius-rules");
+
+	// --no-frame-pacing
+	lcfg_lookup_bool(&cfg, "no-frame-pacing", &opt->no_frame_pacing);
+
 	// -e (frame_opacity)
 	config_lookup_float(&cfg, "frame-opacity", &opt->frame_opacity);
 	// -c (shadow_enable)
--- a/src/meson.build
+++ b/src/meson.build
@@ -9,7 +9,7 @@ base_deps = [

 srcs = [ files('picom.c', 'win.c', 'c2.c', 'x.c', 'config.c', 'vsync.c', 'utils.c',
               'diagnostic.c', 'string_utils.c', 'render.c', 'kernel.c', 'log.c',
-               'options.c', 'event.c', 'cache.c', 'atom.c', 'file_watch.c') ]
+               'options.c', 'event.c', 'cache.c', 'atom.c', 'file_watch.c', 'statistics.c') ]
 picom_inc = include_directories('.')

 cflags = []
--- a/src/options.c
+++ b/src/options.c
@@ -161,6 +161,7 @@ static const struct picom_option picom_options[] = {
    {"corner-radius"               , required_argument, 333, NULL          , "Sets the radius of rounded window corners. When > 0, the compositor will "
                                                                             "round the corners of windows. (defaults to 0)."},
    {"rounded-corners-exclude"     , required_argument, 334, "COND"        , "Exclude conditions for rounded corners."},
+    {"corner-radius-rules"         , required_argument, 340, "RADIUS:COND" , "Window rules for specific rounded corner radii."},
    {"clip-shadow-above"           , required_argument, 335, NULL          , "Specify a list of conditions of windows to not paint a shadow over, such "
                                                                             "as a dock window."},
    {"window-shader-fg"            , required_argument, 336, "PATH"        , "Specify GLSL fragment shader path for rendering window contents. Does not"
@@ -174,6 +175,8 @@ static const struct picom_option picom_options[] = {
    {"dithered-present"            , no_argument      , 339, NULL          , "Use higher precision during rendering, and apply dither when presenting the "
                                                                             "rendered screen. Reduces banding artifacts, but might cause performance "
                                                                             "degradation. Only works with OpenGL."},
+    // 340 is corner-radius-rules
+    {"no-frame-pacing"             , no_argument      , 341, NULL          , "Disable frame pacing. This might increase the latency."},
    {"legacy-backends"             , no_argument      , 733, NULL          , "Use deprecated version of the backends."},
    {"monitor-repaint"             , no_argument      , 800, NULL          , "Highlight the updated area of the screen. For debugging."},
    {"diagnostics"                 , no_argument      , 801, NULL          , "Print diagnostic information"},
@@ -619,7 +622,7 @@ bool get_cfg(options_t *opt, int argc, char *const *argv, bool shadow_enable,
 			break;
 		case 304:
 			// --opacity-rule
-			if (!parse_rule_opacity(&opt->opacity_rules, optarg))
+			if (!parse_numeric_window_rule(&opt->opacity_rules, optarg, 0, 100))
 				exit(1);
 			break;
 		case 305:
@@ -723,6 +726,11 @@ bool get_cfg(options_t *opt, int argc, char *const *argv, bool shadow_enable,
 			// --rounded-corners-exclude
 			condlst_add(&opt->rounded_corners_blacklist, optarg);
 			break;
+		case 340:
+			// --corner-radius-rules
+			if (!parse_numeric_window_rule(&opt->corner_radius_rules, optarg, 0, INT_MAX))
+				exit(1);
+			break;
 		case 335:
 			// --clip-shadow-above
 			condlst_add(&opt->shadow_clip_list, optarg);
@@ -731,6 +739,7 @@ bool get_cfg(options_t *opt, int argc, char *const *argv, bool shadow_enable,
 			// --dithered-present
 			opt->dithered_present = true;
 			break;
+		P_CASEBOOL(341, no_frame_pacing);
 		P_CASEBOOL(733, legacy_backends);
 		P_CASEBOOL(800, monitor_repaint);
 		case 801:
@@ -794,14 +803,13 @@ bool get_cfg(options_t *opt, int argc, char *const *argv, bool shadow_enable,
 	}

 	if (opt->window_shader_fg || opt->window_shader_fg_rules) {
-		if (opt->legacy_backends || opt->backend != BKEND_GLX) {
-			log_warn("The new window shader interface does not work with the "
-			         "legacy glx backend.%s",
-			         (opt->backend == BKEND_GLX) ? " You may want to use "
-			                                       "\"--glx-fshader-win\" "
-			                                       "instead on the legacy "
-			                                       "glx backend."
-			                                     : "");
+		if (opt->backend == BKEND_XRENDER || opt->legacy_backends) {
+			log_warn(opt->backend == BKEND_XRENDER
+			             ? "Shader interface is not supported by the xrender "
+			               "backend."
+			             : "The new shader interface is not supported by the "
+			               "legacy glx backend. You may want to use "
+			               "--glx-fshader-win instead.");
 			opt->window_shader_fg = NULL;
 			c2_list_free(&opt->window_shader_fg_rules, free);
 		}
@@ -816,18 +824,16 @@ bool get_cfg(options_t *opt, int argc, char *const *argv, bool shadow_enable,
 	opt->inactive_dim = normalize_d(opt->inactive_dim);
 	opt->frame_opacity = normalize_d(opt->frame_opacity);
 	opt->shadow_opacity = normalize_d(opt->shadow_opacity);
-
 	opt->max_brightness = normalize_d(opt->max_brightness);
 	if (opt->max_brightness < 1.0) {
-		if (opt->use_damage) {
-			log_warn("--max-brightness requires --no-use-damage. Falling "
-			         "back to 1.0");
+		if (opt->backend == BKEND_XRENDER || opt->legacy_backends) {
+			log_warn("--max-brightness is not supported by the %s backend. "
+			         "Falling back to 1.0.",
+			         opt->backend == BKEND_XRENDER ? "xrender" : "legacy glx");
 			opt->max_brightness = 1.0;
-		}
-
-		if (opt->legacy_backends || opt->backend != BKEND_GLX) {
-			log_warn("--max-brightness requires the new glx "
-			         "backend. Falling back to 1.0");
+		} else if (opt->use_damage) {
+			log_warn("--max-brightness requires --no-use-damage. Falling "
+			         "back to 1.0.");
 			opt->max_brightness = 1.0;
 		}
 	}
--- a/src/picom.c
+++ b/src/picom.c
@@ -16,8 +16,10 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>
+#include <stddef.h>
 #include <stdio.h>
 #include <string.h>
+#include <sys/resource.h>
 #include <unistd.h>
 #include <xcb/composite.h>
 #include <xcb/damage.h>
@@ -43,7 +45,6 @@
 #endif
 #include "backend/backend.h"
 #include "c2.h"
-#include "config.h"
 #include "diagnostic.h"
 #include "log.h"
 #include "region.h"
@@ -60,6 +61,7 @@
 #include "file_watch.h"
 #include "list.h"
 #include "options.h"
+#include "statistics.h"
 #include "uthash_extra.h"

 /// Get session_t pointer from a pointer to a member of session_t
@@ -133,6 +135,7 @@ void check_dpms_status(EV_P attr_unused, ev_timer *w, int revents attr_unused) {
 	}
 	auto now_screen_is_off = dpms_screen_is_off(r);
 	if (ps->screen_is_off != now_screen_is_off) {
+		log_debug("Screen is now %s", now_screen_is_off ? "off" : "on");
 		ps->screen_is_off = now_screen_is_off;
 		queue_redraw(ps);
 	}
@@ -145,21 +148,173 @@ void check_dpms_status(EV_P attr_unused, ev_timer *w, int revents attr_unused) {
 * XXX move to win.c
 */
 static inline struct managed_win *find_win_all(session_t *ps, const xcb_window_t wid) {
-	if (!wid || PointerRoot == wid || wid == ps->root || wid == ps->overlay)
+	if (!wid || PointerRoot == wid || wid == ps->root || wid == ps->overlay) {
 		return NULL;
+	}

 	auto w = find_managed_win(ps, wid);
-	if (!w)
+	if (!w) {
 		w = find_toplevel(ps, wid);
-	if (!w)
+	}
+	if (!w) {
 		w = find_managed_window_or_parent(ps, wid);
+	}
 	return w;
 }

+/// How many seconds into the future should we start rendering the next frame.
+///
+/// Renders are scheduled like this:
+///
+/// 1. queue_redraw() registers the intention to render. redraw_needed is set to true to
+///    indicate what is on screen needs to be updated.
+/// 2. then, we need to figure out the best time to start rendering. first, we need to
+///    know when the next frame will be displayed on screen. we have this information from
+///    the Present extension: we know when was the last frame displayed, and we know the
+///    refresh rate. so we can calculate the next frame's display time. if our render time
+///    estimation shows we could miss that target, we push the target back one frame.
+/// 3. if there is already render completed for that target frame, or there is a render
+///    currently underway, we don't do anything, and wait for the next Present Complete
+///    Notify event to try to schedule again.
+/// 4. otherwise, we schedule a render for that target frame. we use past statistics about
+///    how long our renders took to figure out when to start rendering. we start rendering
+///    at the latest point of time possible to still hit the target frame.
+///
+/// The `triggered_by_timer` parameter is used to indicate whether this function is
+/// triggered by a steady timer, i.e. we are rendering for each vblank. The other case is
+/// when we stop rendering for a while because there is no changes on screen, then
+/// something changed and schedule_render is triggered by a DamageNotify. The idea is that
+/// when the schedule is triggered by a steady timer, schedule_render will be called at a
+/// predictable offset into each vblank.
+
+void schedule_render(session_t *ps, bool triggered_by_vblank) {
+	double delay_s = 0;
+	ps->next_render = 0;
+	if (!ps->frame_pacing || !ps->redirected) {
+		// Not doing frame pacing, schedule a render immediately, if not already
+		// scheduled.
+		// If not redirected, we schedule immediately to have a chance to
+		// redirect. We won't have frame or render timing information anyway.
+		if (!ev_is_active(&ps->draw_timer)) {
+			// We don't know the msc, so we set it to 1, because 0 is a
+			// special value
+			ps->target_msc = 1;
+			goto schedule;
+		}
+		return;
+	}
+	struct timespec render_time;
+	bool completed =
+	    ps->backend_data->ops->last_render_time(ps->backend_data, &render_time);
+	if (!completed || ev_is_active(&ps->draw_timer)) {
+		// There is already a render underway (either just scheduled, or is
+		// rendered but awaiting completion), don't schedule another one.
+		if (ps->target_msc <= ps->last_msc) {
+			log_debug("Target frame %ld is in the past, but we are still "
+			          "rendering",
+			          ps->target_msc);
+			// We missed our target, push it back one frame
+			ps->target_msc = ps->last_msc + 1;
+		}
+		log_trace("Still rendering for target frame %ld, not scheduling another "
+		          "render",
+		          ps->target_msc);
+		return;
+	}
+	if (ps->target_msc > ps->last_msc) {
+		// Render for the target frame is completed, but is yet to be displayed.
+		// Don't schedule another render.
+		log_trace("Target frame %ld is in the future, and we have already "
+		          "rendered, last msc: %d",
+		          ps->target_msc, (int)ps->last_msc);
+		return;
+	}
+
+	struct timespec now;
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	auto now_us = (uint64_t)now.tv_sec * 1000000 + (uint64_t)now.tv_nsec / 1000;
+	if (triggered_by_vblank) {
+		log_trace("vblank schedule delay: %ld us", now_us - ps->last_msc_instant);
+	}
+
+	int render_time_us =
+	    (int)(render_time.tv_sec * 1000000L + render_time.tv_nsec / 1000L);
+	if (ps->target_msc == ps->last_msc) {
+		// The frame has just been displayed, record its render time;
+		if (ps->did_render) {
+			log_trace("Last render call took: %d (gpu) + %d (cpu) us, "
+			          "last_msc: %" PRIu64,
+			          render_time_us, (int)ps->last_schedule_delay, ps->last_msc);
+			render_statistics_add_render_time_sample(
+			    &ps->render_stats, render_time_us + (int)ps->last_schedule_delay);
+		}
+		ps->target_msc = 0;
+		ps->did_render = false;
+		ps->last_schedule_delay = 0;
+	}
+
+	unsigned int divisor = 0;
+	auto render_budget = render_statistics_get_budget(&ps->render_stats, &divisor);
+	auto frame_time = render_statistics_get_vblank_time(&ps->render_stats);
+	if (frame_time == 0) {
+		// We don't have enough data for render time estimates, maybe there's
+		// no frame rendered yet, or the backend doesn't support render timing
+		// information, schedule render immediately.
+		ps->target_msc = ps->last_msc + 1;
+		goto schedule;
+	}
+
+	const auto deadline = ps->last_msc_instant + (unsigned long)divisor * frame_time;
+	unsigned int available = 0;
+	if (deadline > now_us) {
+		available = (unsigned int)(deadline - now_us);
+	}
+
+	ps->target_msc = ps->last_msc + divisor;
+	if (available > render_budget) {
+		delay_s = (double)(available - render_budget) / 1000000.0;
+		ps->next_render = deadline - render_budget;
+	} else {
+		delay_s = 0;
+		ps->next_render = now_us;
+	}
+	if (delay_s > 1) {
+		log_warn("Delay too long: %f s, render_budget: %d us, frame_time: "
+		         "%" PRIu32 " us, now_us: %" PRIu64 " us, next_msc: %" PRIu64 " u"
+		         "s",
+		         delay_s, render_budget, frame_time, now_us, deadline);
+	}
+
+	log_trace("Delay: %.6lf s, last_msc: %" PRIu64 ", render_budget: %d, frame_time: "
+	          "%" PRIu32 ", now_us: %" PRIu64 ", next_msc: %" PRIu64 ", "
+	          "target_msc: %" PRIu64 ", divisor: %d",
+	          delay_s, ps->last_msc_instant, render_budget, frame_time, now_us,
+	          deadline, ps->target_msc, divisor);
+
+schedule:
+	assert(!ev_is_active(&ps->draw_timer));
+	ev_timer_set(&ps->draw_timer, delay_s, 0);
+	ev_timer_start(ps->loop, &ps->draw_timer);
+}
+
 void queue_redraw(session_t *ps) {
+	if (ps->screen_is_off) {
+		// The screen is off, if there is a draw queued for the next frame (i.e.
+		// ps->redraw_needed == true), it won't be triggered until the screen is
+		// on again, because the abnormal Present events we will receive from the
+		// X server when the screen is off. Yet we need the draw_callback to be
+		// called as soon as possible so the screen can be unredirected.
+		// So here we unconditionally start the draw timer.
+		ev_timer_stop(ps->loop, &ps->draw_timer);
+		ev_timer_set(&ps->draw_timer, 0, 0);
+		ev_timer_start(ps->loop, &ps->draw_timer);
+		return;
+	}
+	// Whether we have already rendered for the current frame.
+	// If frame pacing is not enabled, pretend this is false.
 	// If --benchmark is used, redraw is always queued
 	if (!ps->redraw_needed && !ps->o.benchmark) {
-		ev_idle_start(ps->loop, &ps->draw_idle);
+		schedule_render(ps, false);
 	}
 	ps->redraw_needed = true;
 }
@@ -635,7 +790,6 @@ static void configure_root(session_t *ps) {
 		}
 		force_repaint(ps);
 	}
-	return;
 }

 static void handle_root_flags(session_t *ps) {
@@ -1291,6 +1445,39 @@ static bool redirect_start(session_t *ps) {
 		pixman_region32_init(&ps->damage_ring[i]);
 	}

+	ps->frame_pacing = !ps->o.no_frame_pacing;
+	if ((ps->o.legacy_backends || ps->o.benchmark || !ps->backend_data->ops->last_render_time) &&
+	    ps->frame_pacing) {
+		// Disable frame pacing if we are using a legacy backend or if we are in
+		// benchmark mode, or if the backend doesn't report render time
+		log_info("Disabling frame pacing.");
+		ps->frame_pacing = false;
+	}
+
+	if (ps->present_exists && ps->frame_pacing) {
+		ps->present_event_id = x_new_id(ps->c);
+		auto select_input = xcb_present_select_input(
+		    ps->c, ps->present_event_id, session_get_target_window(ps),
+		    XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY);
+		auto notify_msc =
+		    xcb_present_notify_msc(ps->c, session_get_target_window(ps), 0, 0, 1, 0);
+		set_cant_fail_cookie(ps, select_input);
+		set_cant_fail_cookie(ps, notify_msc);
+		ps->present_event = xcb_register_for_special_xge(
+		    ps->c, &xcb_present_id, ps->present_event_id, NULL);
+
+		// Initialize rendering and frame timing statistics, and frame pacing
+		// states.
+		ps->last_msc_instant = 0;
+		ps->last_msc = 0;
+		ps->last_schedule_delay = 0;
+		ps->target_msc = 0;
+		render_statistics_reset(&ps->render_stats);
+	} else if (ps->frame_pacing) {
+		log_error("Present extension is not supported, frame pacing disabled.");
+		ps->frame_pacing = false;
+	}
+
 	// Must call XSync() here
 	x_sync(ps->c);

@@ -1332,6 +1519,14 @@ static void unredirect(session_t *ps) {
 	free(ps->damage_ring);
 	ps->damage_ring = ps->damage = NULL;

+	if (ps->present_event_id) {
+		xcb_present_select_input(ps->c, ps->present_event_id,
+		                         session_get_target_window(ps), 0);
+		ps->present_event_id = XCB_NONE;
+		xcb_unregister_for_special_event(ps->c, ps->present_event);
+		ps->present_event = NULL;
+	}
+
 	// Must call XSync() here
 	x_sync(ps->c);

@@ -1339,9 +1534,93 @@ static void unredirect(session_t *ps) {
 	log_debug("Screen unredirected.");
 }

+static void
+handle_present_complete_notify(session_t *ps, xcb_present_complete_notify_event_t *cne) {
+	if (cne->kind != XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC) {
+		return;
+	}
+
+	bool event_is_invalid = false;
+	if (ps->frame_pacing) {
+		auto next_msc = cne->msc + 1;
+		if (cne->msc <= ps->last_msc || cne->ust == 0) {
+			// X sometimes sends duplicate/bogus MSC events, don't
+			// use the msc value. Also ignore these events.
+			//
+			// See:
+			// https://gitlab.freedesktop.org/xorg/xserver/-/issues/1418
+			next_msc = ps->last_msc + 1;
+			event_is_invalid = true;
+		}
+		auto cookie = xcb_present_notify_msc(ps->c, session_get_target_window(ps),
+		                                     0, next_msc, 0, 0);
+		set_cant_fail_cookie(ps, cookie);
+	}
+	if (event_is_invalid) {
+		return;
+	}
+
+	struct timespec now;
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	uint64_t now_usec = (uint64_t)(now.tv_sec * 1000000 + now.tv_nsec / 1000);
+	uint64_t drift;
+	if (cne->ust > now_usec) {
+		drift = cne->ust - now_usec;
+	} else {
+		drift = now_usec - cne->ust;
+	}
+
+	if (ps->last_msc_instant != 0) {
+		auto frame_count = cne->msc - ps->last_msc;
+		int frame_time = (int)((cne->ust - ps->last_msc_instant) / frame_count);
+		render_statistics_add_vblank_time_sample(&ps->render_stats, frame_time);
+		log_trace("Frame count %lu, frame time: %d us, rolling average: %u us, "
+		          "msc: %" PRIu64 ", offset: %d us",
+		          frame_count, frame_time,
+		          render_statistics_get_vblank_time(&ps->render_stats), cne->ust,
+		          (int)drift);
+	} else if (drift > 1000000 && ps->frame_pacing) {
+		// This is the first MSC event we receive, let's check if the timestamps
+		// align with the monotonic clock. If not, disable frame pacing because we
+		// can't schedule frames reliably.
+		log_error("Temporal anomaly detected, frame pacing disabled. (Are we "
+		          "running inside a time namespace?), %" PRIu64 " %" PRIu64,
+		          now_usec, ps->last_msc_instant);
+		ps->frame_pacing = false;
+	}
+	ps->last_msc_instant = cne->ust;
+	ps->last_msc = cne->msc;
+	if (ps->redraw_needed) {
+		schedule_render(ps, true);
+	}
+}
+
+static void handle_present_events(session_t *ps) {
+	if (!ps->present_event) {
+		// Screen not redirected
+		return;
+	}
+	xcb_present_generic_event_t *ev;
+	while ((ev = (void *)xcb_poll_for_special_event(ps->c, ps->present_event))) {
+		if (ev->event != ps->present_event_id) {
+			// This event doesn't have the right event context, it's not meant
+			// for us.
+			goto next;
+		}
+
+		// We only subscribed to the complete notify event.
+		assert(ev->evtype == XCB_PRESENT_EVENT_COMPLETE_NOTIFY);
+		handle_present_complete_notify(ps, (void *)ev);
+	next:
+		free(ev);
+	}
+}
+
 // Handle queued events before we go to sleep
 static void handle_queued_x_events(EV_P attr_unused, ev_prepare *w, int revents attr_unused) {
 	session_t *ps = session_ptr(w, event_check);
+	handle_present_events(ps);
+
 	xcb_generic_event_t *ev;
 	while ((ev = xcb_poll_for_queued_event(ps->c))) {
 		ev_handle(ps, ev);
@@ -1403,6 +1682,8 @@ static void tmout_unredir_callback(EV_P attr_unused, ev_timer *w, int revents at
 }

 static void fade_timer_callback(EV_P attr_unused, ev_timer *w, int revents attr_unused) {
+	// TODO(yshui): do we still need the fade timer? we queue redraw automatically in
+	// draw_callback_impl if animation is running.
 	session_t *ps = session_ptr(w, fade_timer);
 	queue_redraw(ps);
 }
@@ -1460,8 +1741,24 @@ static void handle_pending_updates(EV_P_ struct session *ps) {
 }

 static void draw_callback_impl(EV_P_ session_t *ps, int revents attr_unused) {
+	struct timespec now;
+	int64_t draw_callback_enter_us;
+	clock_gettime(CLOCK_MONOTONIC, &now);
+
+	draw_callback_enter_us = (now.tv_sec * 1000000LL + now.tv_nsec / 1000);
+	if (ps->next_render != 0) {
+		log_trace("Schedule delay: %" PRIi64 " us",
+		          draw_callback_enter_us - (int64_t)ps->next_render);
+	}
+
 	handle_pending_updates(EV_A_ ps);

+	int64_t after_handle_pending_updates_us;
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	after_handle_pending_updates_us = (now.tv_sec * 1000000LL + now.tv_nsec / 1000);
+	log_trace("handle_pending_updates took: %" PRIi64 " us",
+	          after_handle_pending_updates_us - draw_callback_enter_us);
+
 	if (ps->first_frame) {
 		// If we are still rendering the first frame, if some of the windows are
 		// unmapped/destroyed during the above handle_pending_updates() call, they
@@ -1518,15 +1815,21 @@ static void draw_callback_impl(EV_P_ session_t *ps, int revents attr_unused) {
 		ev_timer_start(EV_A_ & ps->fade_timer);
 	}

+	int64_t after_preprocess_us;
+	clock_gettime(CLOCK_MONOTONIC, &now);
+	after_preprocess_us = (now.tv_sec * 1000000LL + now.tv_nsec / 1000);
+	log_trace("paint_preprocess took: %" PRIi64 " us",
+	          after_preprocess_us - after_handle_pending_updates_us);
+
 	// If the screen is unredirected, free all_damage to stop painting
 	if (ps->redirected && ps->o.stoppaint_force != ON) {
 		static int paint = 0;

 		log_trace("Render start, frame %d", paint);
 		if (!ps->o.legacy_backends) {
-			paint_all_new(ps, bottom, false);
+			paint_all_new(ps, bottom);
 		} else {
-			paint_all(ps, bottom, false);
+			paint_all(ps, bottom);
 		}
 		log_trace("Render end");

@@ -1544,18 +1847,21 @@ static void draw_callback_impl(EV_P_ session_t *ps, int revents attr_unused) {
 	// TODO(yshui) Investigate how big the X critical section needs to be. There are
 	// suggestions that rendering should be in the critical section as well.

+	// Queue redraw if animation is running. This should be picked up by next present
+	// event.
 	ps->redraw_needed = animation;
 }

-static void draw_callback(EV_P_ ev_idle *w, int revents) {
-	session_t *ps = session_ptr(w, draw_idle);
+static void draw_callback(EV_P_ ev_timer *w, int revents) {
+	session_t *ps = session_ptr(w, draw_timer);

 	draw_callback_impl(EV_A_ ps, revents);
+	ev_timer_stop(EV_A_ w);

-	// Don't do painting non-stop unless we are in benchmark mode, or if
-	// draw_callback_impl thinks we should continue painting.
-	if (!ps->o.benchmark && !ps->redraw_needed) {
-		ev_idle_stop(EV_A_ & ps->draw_idle);
+	// Immediately start next frame if we are in benchmark mode.
+	if (ps->o.benchmark) {
+		ev_timer_set(w, 0, 0);
+		ev_timer_start(EV_A_ w);
 	}
 }

@@ -1697,6 +2003,8 @@ static session_t *session_init(int argc, char **argv, Display *dpy,
 	    .white_picture = XCB_NONE,
 	    .shadow_context = NULL,

+	    .last_msc = 0,
+
 #ifdef CONFIG_VSYNC_DRM
 	    .drm_fd = -1,
 #endif
@@ -1716,6 +2024,7 @@ static session_t *session_init(int argc, char **argv, Display *dpy,
 	    .randr_exists = 0,
 	    .randr_event = 0,
 	    .randr_error = 0,
+	    .present_event_id = XCB_NONE,
 	    .glx_exists = false,
 	    .glx_event = 0,
 	    .glx_error = 0,
@@ -1743,6 +2052,9 @@ static session_t *session_init(int argc, char **argv, Display *dpy,
 	ps->loop = EV_DEFAULT;
 	pixman_region32_init(&ps->screen_reg);

+	// TODO(yshui) investigate what's the best window size
+	render_statistics_init(&ps->render_stats, 128);
+
 	ps->pending_reply_tail = &ps->pending_reply_head;

 	ps->o.show_all_xerrors = all_xerrors;
@@ -2162,7 +2474,7 @@ static session_t *session_init(int argc, char **argv, Display *dpy,
 	ev_io_init(&ps->xiow, x_event_callback, ConnectionNumber(ps->dpy), EV_READ);
 	ev_io_start(ps->loop, &ps->xiow);
 	ev_init(&ps->unredir_timer, tmout_unredir_callback);
-	ev_idle_init(&ps->draw_idle, draw_callback);
+	ev_init(&ps->draw_timer, draw_callback);

 	ev_init(&ps->fade_timer, fade_timer_callback);

@@ -2264,6 +2576,31 @@ err:
 	free(ps);
 	return NULL;
 }
+void set_rr_scheduling(void) {
+	struct rlimit rlim;
+	if (getrlimit(RLIMIT_RTPRIO, &rlim) != 0) {
+		log_warn("Failed to get RLIMIT_RTPRIO, not setting real-time priority");
+		return;
+	}
+	int old_policy;
+	int ret;
+	struct sched_param param;
+
+	ret = pthread_getschedparam(pthread_self(), &old_policy, &param);
+	if (ret != 0) {
+		log_debug("Failed to get old scheduling priority");
+		return;
+	}
+
+	param.sched_priority = (int)rlim.rlim_cur;
+
+	ret = pthread_setschedparam(pthread_self(), SCHED_RR, &param);
+	if (ret != 0) {
+		log_info("Failed to set scheduling priority to %lu", rlim.rlim_cur);
+		return;
+	}
+	log_info("Set scheduling priority to %lu", rlim.rlim_cur);
+}

 /**
 * Destroy a session.
@@ -2378,6 +2715,8 @@ static void session_destroy(session_t *ps) {
 	free(ps->o.glx_fshader_win_str);
 	x_free_randr_info(ps);

+	render_statistics_destroy(&ps->render_stats);
+
 	// Release custom window shaders
 	free(ps->o.window_shader_fg);
 	struct shader_info *shader, *tmp;
@@ -2457,7 +2796,7 @@ static void session_destroy(session_t *ps) {
 	ev_timer_stop(ps->loop, &ps->unredir_timer);
 	ev_timer_stop(ps->loop, &ps->fade_timer);
 	ev_timer_stop(ps->loop, &ps->dpms_check_timer);
-	ev_idle_stop(ps->loop, &ps->draw_idle);
+	ev_timer_stop(ps->loop, &ps->draw_timer);
 	ev_prepare_stop(ps->loop, &ps->event_check);
 	ev_signal_stop(ps->loop, &ps->usr1_signal);
 	ev_signal_stop(ps->loop, &ps->int_signal);
@@ -2469,9 +2808,11 @@ static void session_destroy(session_t *ps) {
 * @param ps current session
 */
 static void session_run(session_t *ps) {
-	// In benchmark mode, we want draw_idle handler to always be active
+	set_rr_scheduling();
+	// In benchmark mode, we want draw_timer handler to always be active
 	if (ps->o.benchmark) {
-		ev_idle_start(ps->loop, &ps->draw_idle);
+		ev_timer_set(&ps->draw_timer, 0, 0);
+		ev_timer_start(ps->loop, &ps->draw_timer);
 	} else {
 		// Let's draw our first frame!
 		queue_redraw(ps);
--- a/src/render.c
+++ b/src/render.c
@@ -969,7 +969,7 @@ win_blur_background(session_t *ps, struct managed_win *w, xcb_render_picture_t t
 /// paint all windows
 /// region = ??
 /// region_real = the damage region
-void paint_all(session_t *ps, struct managed_win *t, bool ignore_damage) {
+void paint_all(session_t *ps, struct managed_win *t) {
 	if (ps->o.xrender_sync_fence || (ps->drivers & DRIVER_NVIDIA)) {
 		if (ps->xsync_exists && !x_fence_sync(ps->c, ps->sync_fence)) {
 			log_error("x_fence_sync failed, xrender-sync-fence will be "
@@ -984,7 +984,7 @@ void paint_all(session_t *ps, struct managed_win *t, bool ignore_damage) {
 	region_t region;
 	pixman_region32_init(&region);
 	int buffer_age = get_buffer_age(ps);
-	if (buffer_age == -1 || buffer_age > ps->ndamage || ignore_damage) {
+	if (buffer_age == -1 || buffer_age > ps->ndamage) {
 		pixman_region32_copy(&region, &ps->screen_reg);
 	} else {
 		for (int i = 0; i < get_buffer_age(ps); i++) {
--- a/src/render.h
+++ b/src/render.h
@@ -37,7 +37,7 @@ void render(session_t *ps, int x, int y, int dx, int dy, int w, int h, int fullw
            const glx_prog_main_t *pprogram, clip_t *clip);
 void paint_one(session_t *ps, struct managed_win *w, const region_t *reg_paint);

-void paint_all(session_t *ps, struct managed_win *const t, bool ignore_damage);
+void paint_all(session_t *ps, struct managed_win *const t);

 void free_picture(xcb_connection_t *c, xcb_render_picture_t *p);

--- a/src/statistics.c
+++ b/src/statistics.c
@@ -0,0 +1,100 @@
+//! Rendering statistics
+//!
+//! Tracks how long it takes to render a frame, for measuring performance, and for pacing
+//! the frames.
+
+#include "statistics.h"
+#include "log.h"
+#include "utils.h"
+
+void render_statistics_init(struct render_statistics *rs, int window_size) {
+	*rs = (struct render_statistics){0};
+
+	rolling_window_init(&rs->render_times, window_size);
+	rolling_quantile_init_with_tolerance(&rs->render_time_quantile, window_size,
+	                                     /* q */ 0.98, /* tolerance */ 0.01);
+}
+
+void render_statistics_add_vblank_time_sample(struct render_statistics *rs, int time_us) {
+	auto sample_sd = sqrt(cumulative_mean_and_var_get_var(&rs->vblank_time_us));
+	auto current_estimate = render_statistics_get_vblank_time(rs);
+	if (current_estimate != 0 && fabs((double)time_us - current_estimate) > sample_sd * 3) {
+		// Deviated from the mean by more than 3 sigma (p < 0.003)
+		log_debug("vblank time outlier: %d %f %f", time_us, rs->vblank_time_us.mean,
+		          cumulative_mean_and_var_get_var(&rs->vblank_time_us));
+		// An outlier sample, this could mean things like refresh rate changes, so
+		// we reset the statistics. This could also be benign, but we like to be
+		// cautious.
+		cumulative_mean_and_var_init(&rs->vblank_time_us);
+	}
+
+	if (rs->vblank_time_us.mean != 0) {
+		auto nframes_in_10_seconds =
+		    (unsigned int)(10. * 1000000. / rs->vblank_time_us.mean);
+		if (rs->vblank_time_us.n > 20 && rs->vblank_time_us.n > nframes_in_10_seconds) {
+			// We collected 10 seconds worth of samples, we assume the
+			// estimated refresh rate is stable. We will still reset the
+			// statistics if we get an outlier sample though, see above.
+			return;
+		}
+	}
+	cumulative_mean_and_var_update(&rs->vblank_time_us, time_us);
+}
+
+void render_statistics_add_render_time_sample(struct render_statistics *rs, int time_us) {
+	int oldest;
+	if (rolling_window_push_back(&rs->render_times, time_us, &oldest)) {
+		rolling_quantile_pop_front(&rs->render_time_quantile, oldest);
+	}
+
+	rolling_quantile_push_back(&rs->render_time_quantile, time_us);
+}
+
+/// How much time budget we should give to the backend for rendering, in microseconds.
+///
+/// A `divisor` is also returned, indicating the target framerate. The divisor is
+/// the number of vblanks we should wait between each frame. A divisor of 1 means
+/// full framerate, 2 means half framerate, etc.
+unsigned int
+render_statistics_get_budget(struct render_statistics *rs, unsigned int *divisor) {
+	if (rs->render_times.nelem < rs->render_times.window_size) {
+		// No valid render time estimates yet. Assume maximum budget.
+		*divisor = 1;
+		return UINT_MAX;
+	}
+
+	// N-th percentile of render times, see render_statistics_init for N.
+	auto render_time_percentile =
+	    rolling_quantile_estimate(&rs->render_time_quantile, &rs->render_times);
+	auto vblank_time_us = render_statistics_get_vblank_time(rs);
+	if (vblank_time_us == 0) {
+		// We don't have a good estimate of the vblank time yet, so we
+		// assume we can finish in one vblank.
+		*divisor = 1;
+	} else {
+		*divisor =
+		    (unsigned int)(render_time_percentile / rs->vblank_time_us.mean + 1);
+	}
+	return (unsigned int)render_time_percentile;
+}
+
+unsigned int render_statistics_get_vblank_time(struct render_statistics *rs) {
+	if (rs->vblank_time_us.n <= 20 || rs->vblank_time_us.mean < 100) {
+		// Not enough samples yet, or the vblank time is too short to be
+		// meaningful. Assume maximum budget.
+		return 0;
+	}
+	return (unsigned int)rs->vblank_time_us.mean;
+}
+
+void render_statistics_reset(struct render_statistics *rs) {
+	rolling_window_reset(&rs->render_times);
+	rolling_quantile_reset(&rs->render_time_quantile);
+	rs->vblank_time_us = (struct cumulative_mean_and_var){0};
+}
+
+void render_statistics_destroy(struct render_statistics *rs) {
+	render_statistics_reset(rs);
+	rolling_window_destroy(&rs->render_times);
+	rolling_quantile_destroy(&rs->render_time_quantile);
+}
--- a/src/statistics.h
+++ b/src/statistics.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include "utils.h"
+
+#define NTIERS (3)
+
+struct render_statistics {
+	/// Rolling window of rendering times (in us) and the tiers they belong to.
+	/// We keep track of the tiers because the vblank time estimate can change over
+	/// time.
+	struct rolling_window render_times;
+	/// Estimate the 95-th percentile of rendering times
+	struct rolling_quantile render_time_quantile;
+	/// Time between each vblanks
+	struct cumulative_mean_and_var vblank_time_us;
+};
+
+void render_statistics_init(struct render_statistics *rs, int window_size);
+void render_statistics_reset(struct render_statistics *rs);
+void render_statistics_destroy(struct render_statistics *rs);
+
+void render_statistics_add_vblank_time_sample(struct render_statistics *rs, int time_us);
+void render_statistics_add_render_time_sample(struct render_statistics *rs, int time_us);
+
+/// How much time budget we should give to the backend for rendering, in microseconds.
+///
+/// A `divisor` is also returned, indicating the target framerate. The divisor is
+/// the number of vblanks we should wait between each frame. A divisor of 1 means
+/// full framerate, 2 means half framerate, etc.
+unsigned int
+render_statistics_get_budget(struct render_statistics *rs, unsigned int *divisor);
+
+unsigned int render_statistics_get_vblank_time(struct render_statistics *rs);
--- a/src/utils.c
+++ b/src/utils.c
@@ -48,7 +48,43 @@ int next_power_of_two(int n) {
 	return n;
 }

-/// Track the rolling maximum of a stream of integers.
+void rolling_window_destroy(struct rolling_window *rw) {
+	free(rw->elem);
+	rw->elem = NULL;
+}
+
+void rolling_window_reset(struct rolling_window *rw) {
+	rw->nelem = 0;
+	rw->elem_head = 0;
+}
+
+void rolling_window_init(struct rolling_window *rw, int size) {
+	rw->elem = ccalloc(size, int);
+	rw->window_size = size;
+	rolling_window_reset(rw);
+}
+
+int rolling_window_pop_front(struct rolling_window *rw) {
+	assert(rw->nelem > 0);
+	auto ret = rw->elem[rw->elem_head];
+	rw->elem_head = (rw->elem_head + 1) % rw->window_size;
+	rw->nelem--;
+	return ret;
+}
+
+bool rolling_window_push_back(struct rolling_window *rw, int val, int *front) {
+	bool full = rw->nelem == rw->window_size;
+	if (full) {
+		*front = rolling_window_pop_front(rw);
+	}
+	rw->elem[(rw->elem_head + rw->nelem) % rw->window_size] = val;
+	rw->nelem++;
+	return full;
+}
+
+/// Track the maximum member of a FIFO queue of integers. Integers are pushed to the back
+/// and popped from the front, the maximum of the current members in the queue is
+/// tracked.
 struct rolling_max {
 	/// A priority queue holding the indices of the maximum element candidates.
 	/// The head of the queue is the index of the maximum element.
@@ -59,32 +95,26 @@ struct rolling_max {
 	/// it's called the "original" indices.
 	int *p;
 	int p_head, np;
-
-	/// The elemets
-	int *elem;
-	int elem_head, nelem;
-
-	int window_size;
+	/// The maximum number of in flight elements.
+	int capacity;
 };

 void rolling_max_destroy(struct rolling_max *rm) {
-	free(rm->elem);
 	free(rm->p);
 	free(rm);
 }

-struct rolling_max *rolling_max_new(int size) {
+struct rolling_max *rolling_max_new(int capacity) {
 	auto rm = ccalloc(1, struct rolling_max);
 	if (!rm) {
 		return NULL;
 	}

-	rm->p = ccalloc(size, int);
-	rm->elem = ccalloc(size, int);
-	rm->window_size = size;
-	if (!rm->p || !rm->elem) {
+	rm->p = ccalloc(capacity, int);
+	if (!rm->p) {
 		goto err;
 	}
+	rm->capacity = capacity;

 	return rm;

@@ -96,33 +126,21 @@ err:
 void rolling_max_reset(struct rolling_max *rm) {
 	rm->p_head = 0;
 	rm->np = 0;
-	rm->nelem = 0;
-	rm->elem_head = 0;
 }

-void rolling_max_push(struct rolling_max *rm, int val) {
-#define IDX(n) ((n) % rm->window_size)
-	if (rm->nelem == rm->window_size) {
-		auto old_head = rm->elem_head;
-		// Discard the oldest element.
-		// rm->elem.pop_front();
-		rm->nelem--;
-		rm->elem_head = IDX(rm->elem_head + 1);
-
-		// Remove discarded element from the priority queue too.
-		assert(rm->np);
-		if (rm->p[rm->p_head] == old_head) {
-			// rm->p.pop_front()
-			rm->p_head = IDX(rm->p_head + 1);
-			rm->np--;
-		}
+#define IDX(n) ((n) % rm->capacity)
+/// Remove the oldest element in the window. The caller must maintain the list of elements
+/// themselves, i.e. the behavior is undefined if `front` does not 1match the oldest
+/// element.
+void rolling_max_pop_front(struct rolling_max *rm, int front) {
+	if (rm->p[rm->p_head] == front) {
+		// rm->p.pop_front()
+		rm->p_head = IDX(rm->p_head + 1);
+		rm->np--;
 	}
+}

-	// Add the new element to the queue.
-	// rm->elem.push_back(val)
-	rm->elem[IDX(rm->elem_head + rm->nelem)] = val;
-	rm->nelem++;
-
+void rolling_max_push_back(struct rolling_max *rm, int val) {
 	// Update the prority queue.
 	// Remove all elements smaller than the new element from the queue. Because
 	// the new element will become the maximum element before them, and since they
@@ -130,7 +148,7 @@ void rolling_max_push(struct rolling_max *rm, int val) {
 	// element, so they will never become the maximum element.
 	while (rm->np) {
 		int p_tail = IDX(rm->p_head + rm->np - 1);
-		if (rm->elem[rm->p[p_tail]] > val) {
+		if (rm->p[p_tail] > val) {
 			break;
 		}
 		// rm->p.pop_back()
@@ -138,108 +156,119 @@ void rolling_max_push(struct rolling_max *rm, int val) {
 	}
 	// Add the new element to the end of the queue.
 	// rm->p.push_back(rm->start_index + rm->nelem - 1)
-	rm->p[IDX(rm->p_head + rm->np)] = IDX(rm->elem_head + rm->nelem - 1);
+	assert(rm->np < rm->capacity);
+	rm->p[IDX(rm->p_head + rm->np)] = val;
 	rm->np++;
-#undef IDX
 }
+#undef IDX

 int rolling_max_get_max(struct rolling_max *rm) {
 	if (rm->np == 0) {
 		return INT_MIN;
 	}
-	return rm->elem[rm->p[rm->p_head]];
+	return rm->p[rm->p_head];
 }

 TEST_CASE(rolling_max_test) {
 #define NELEM 15
+	struct rolling_window queue;
+	rolling_window_init(&queue, 3);
 	auto rm = rolling_max_new(3);
 	const int data[NELEM] = {1, 2, 3, 1, 4, 5, 2, 3, 6, 5, 4, 3, 2, 0, 0};
 	const int expected_max[NELEM] = {1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 6, 5, 4, 3, 2};
 	int max[NELEM] = {0};
 	for (int i = 0; i < NELEM; i++) {
-		rolling_max_push(rm, data[i]);
+		int front;
+		bool full = rolling_window_push_back(&queue, data[i], &front);
+		if (full) {
+			rolling_max_pop_front(rm, front);
+		}
+		rolling_max_push_back(rm, data[i]);
 		max[i] = rolling_max_get_max(rm);
 	}
+	rolling_window_destroy(&queue);
+	rolling_max_destroy(rm);
 	TEST_TRUE(memcmp(max, expected_max, sizeof(max)) == 0);
 #undef NELEM
 }

-/// A rolling average of a stream of integers.
-struct rolling_avg {
-	/// The sum of the elements in the window.
-	int64_t sum;
+// Find the k-th smallest element in an array.
+int quickselect(int *elems, int nelem, int k) {
+	int l = 0, r = nelem;        // [l, r) is the range of candidates
+	while (l != r) {
+		int pivot = elems[l];
+		int i = l, j = r;
+		while (i < j) {
+			while (i < j && elems[--j] >= pivot) {
+			}
+			elems[i] = elems[j];
+			while (i < j && elems[++i] <= pivot) {
+			}
+			elems[j] = elems[i];
+		}
+		elems[i] = pivot;

-	/// The elements in the window.
-	int *elem;
-	int head, nelem;
+		if (i == k) {
+			break;
+		}

-	int window_size;
-};
-
-struct rolling_avg *rolling_avg_new(int size) {
-	auto rm = ccalloc(1, struct rolling_avg);
-	if (!rm) {
-		return NULL;
+		if (i < k) {
+			l = i + 1;
+		} else {
+			r = i;
+		}
 	}
-
-	rm->elem = ccalloc(size, int);
-	rm->window_size = size;
-	if (!rm->elem) {
-		free(rm);
-		return NULL;
-	}
-
-	return rm;
+	return elems[k];
 }

-void rolling_avg_destroy(struct rolling_avg *rm) {
-	free(rm->elem);
-	free(rm);
+void rolling_quantile_init(struct rolling_quantile *rq, int capacity, int mink, int maxk) {
+	*rq = (struct rolling_quantile){0};
+	rq->tmp_buffer = malloc(sizeof(int) * (size_t)capacity);
+	rq->capacity = capacity;
+	rq->min_target_rank = mink;
+	rq->max_target_rank = maxk;
 }

-void rolling_avg_reset(struct rolling_avg *ra) {
-	ra->sum = 0;
-	ra->nelem = 0;
-	ra->head = 0;
+void rolling_quantile_init_with_tolerance(struct rolling_quantile *rq, int window_size,
+                                          double target, double tolerance) {
+	rolling_quantile_init(rq, window_size, (int)((target - tolerance) * window_size),
+	                      (int)((target + tolerance) * window_size));
 }

-void rolling_avg_push(struct rolling_avg *ra, int val) {
-	if (ra->nelem == ra->window_size) {
-		// Discard the oldest element.
-		// rm->elem.pop_front();
-		ra->sum -= ra->elem[ra->head % ra->window_size];
-		ra->nelem--;
-		ra->head = (ra->head + 1) % ra->window_size;
+void rolling_quantile_reset(struct rolling_quantile *rq) {
+	rq->current_rank = 0;
+	rq->estimate = 0;
+}
+
+void rolling_quantile_destroy(struct rolling_quantile *rq) {
+	free(rq->tmp_buffer);
+}
+
+int rolling_quantile_estimate(struct rolling_quantile *rq, struct rolling_window *elements) {
+	if (rq->current_rank < rq->min_target_rank || rq->current_rank > rq->max_target_rank) {
+		if (elements->nelem != elements->window_size) {
+			return INT_MIN;
+		}
+		// Re-estimate the quantile.
+		assert(elements->nelem <= rq->capacity);
+		rolling_window_copy_to_array(elements, rq->tmp_buffer);
+		const int target_rank =
+		    rq->min_target_rank + (rq->max_target_rank - rq->min_target_rank) / 2;
+		rq->estimate = quickselect(rq->tmp_buffer, elements->nelem, target_rank);
+		rq->current_rank = target_rank;
 	}
-
-	// Add the new element to the queue.
-	// rm->elem.push_back(val)
-	ra->elem[(ra->head + ra->nelem) % ra->window_size] = val;
-	ra->sum += val;
-	ra->nelem++;
+	return rq->estimate;
 }

-double rolling_avg_get_avg(struct rolling_avg *ra) {
-	if (ra->nelem == 0) {
-		return 0;
+void rolling_quantile_push_back(struct rolling_quantile *rq, int x) {
+	if (x <= rq->estimate) {
+		rq->current_rank++;
 	}
-	return (double)ra->sum / (double)ra->nelem;
 }

-TEST_CASE(rolling_avg_test) {
-#define NELEM 15
-	auto rm = rolling_avg_new(3);
-	const int data[NELEM] = {1, 2, 3, 1, 4, 5, 2, 3, 6, 5, 4, 3, 2, 0, 0};
-	const double expected_avg[NELEM] = {
-	    1,          1.5,        2, 2, 8.0 / 3.0, 10.0 / 3.0, 11.0 / 3.0, 10.0 / 3.0,
-	    11.0 / 3.0, 14.0 / 3.0, 5, 4, 3,         5.0 / 3.0,  2.0 / 3.0};
-	double avg[NELEM] = {0};
-	for (int i = 0; i < NELEM; i++) {
-		rolling_avg_push(rm, data[i]);
-		avg[i] = rolling_avg_get_avg(rm);
-	}
-	for (int i = 0; i < NELEM; i++) {
-		TEST_EQUAL(avg[i], expected_avg[i]);
+void rolling_quantile_pop_front(struct rolling_quantile *rq, int x) {
+	if (x <= rq->estimate) {
+		rq->current_rank--;
 	}
 }

--- a/src/utils.h
+++ b/src/utils.h
@@ -17,6 +17,7 @@
 #include <time.h>

 #include "compiler.h"
+#include "log.h"
 #include "types.h"

 #define ARR_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
@@ -289,20 +290,97 @@ static inline void free_charpp(char **str) {
 ///
 int next_power_of_two(int n);

+struct rolling_window {
+	int *elem;
+	int elem_head, nelem;
+	int window_size;
+};
+
+void rolling_window_destroy(struct rolling_window *rw);
+void rolling_window_reset(struct rolling_window *rw);
+void rolling_window_init(struct rolling_window *rw, int size);
+int rolling_window_pop_front(struct rolling_window *rw);
+bool rolling_window_push_back(struct rolling_window *rw, int val, int *front);
+
+/// Copy the contents of the rolling window to an array. The array is assumed to
+/// have enough space to hold the contents of the rolling window.
+static inline void attr_unused rolling_window_copy_to_array(struct rolling_window *rw,
+                                                            int *arr) {
+	// The length from head to the end of the array
+	auto head_len = (size_t)(rw->window_size - rw->elem_head);
+	if (head_len >= (size_t)rw->nelem) {
+		memcpy(arr, rw->elem + rw->elem_head, sizeof(int) * (size_t)rw->nelem);
+	} else {
+		auto tail_len = (size_t)((size_t)rw->nelem - head_len);
+		memcpy(arr, rw->elem + rw->elem_head, sizeof(int) * head_len);
+		memcpy(arr + head_len, rw->elem, sizeof(int) * tail_len);
+	}
+}
+
 struct rolling_max;

-struct rolling_max *rolling_max_new(int window_size);
-void rolling_max_free(struct rolling_max *rm);
+struct rolling_max *rolling_max_new(int capacity);
+void rolling_max_destroy(struct rolling_max *rm);
 void rolling_max_reset(struct rolling_max *rm);
-void rolling_max_push(struct rolling_max *rm, int val);
+void rolling_max_pop_front(struct rolling_max *rm, int front);
+void rolling_max_push_back(struct rolling_max *rm, int val);
 int rolling_max_get_max(struct rolling_max *rm);

-struct rolling_avg;
-struct rolling_avg *rolling_avg_new(int window_size);
-void rolling_avg_free(struct rolling_avg *ra);
-void rolling_avg_reset(struct rolling_avg *ra);
-void rolling_avg_push(struct rolling_avg *ra, int val);
-double rolling_avg_get_avg(struct rolling_avg *ra);
+/// Estimate the mean and variance of random variable X using Welford's online
+/// algorithm.
+struct cumulative_mean_and_var {
+	double mean;
+	double m2;
+	unsigned int n;
+};
+
+static inline attr_unused void
+cumulative_mean_and_var_init(struct cumulative_mean_and_var *cmv) {
+	*cmv = (struct cumulative_mean_and_var){0};
+}
+
+static inline attr_unused void
+cumulative_mean_and_var_update(struct cumulative_mean_and_var *cmv, double x) {
+	if (cmv->n == UINT_MAX) {
+		// We have too many elements, let's keep the mean and variance.
+		return;
+	}
+	cmv->n++;
+	double delta = x - cmv->mean;
+	cmv->mean += delta / (double)cmv->n;
+	cmv->m2 += delta * (x - cmv->mean);
+}
+
+static inline attr_unused double
+cumulative_mean_and_var_get_var(struct cumulative_mean_and_var *cmv) {
+	if (cmv->n < 2) {
+		return 0;
+	}
+	return cmv->m2 / (double)(cmv->n - 1);
+}
+
+// Find the k-th smallest element in an array.
+int quickselect(int *elems, int nelem, int k);
+
+/// A naive quantile estimator.
+///
+/// Estimates the N-th percentile of a random variable X in a sliding window.
+struct rolling_quantile {
+	int current_rank;
+	int min_target_rank, max_target_rank;
+	int estimate;
+	int capacity;
+	int *tmp_buffer;
+};
+
+void rolling_quantile_init(struct rolling_quantile *rq, int capacity, int mink, int maxk);
+void rolling_quantile_init_with_tolerance(struct rolling_quantile *rq, int window_size,
+                                          double target, double tolerance);
+void rolling_quantile_reset(struct rolling_quantile *rq);
+void rolling_quantile_destroy(struct rolling_quantile *rq);
+int rolling_quantile_estimate(struct rolling_quantile *rq, struct rolling_window *elements);
+void rolling_quantile_push_back(struct rolling_quantile *rq, int x);
+void rolling_quantile_pop_front(struct rolling_quantile *rq, int x);

 // Some versions of the Android libc do not have timespec_get(), use
 // clock_gettime() instead.
--- a/src/win.c
+++ b/src/win.c
@@ -1139,13 +1139,24 @@ static void win_determine_rounded_corners(session_t *ps, struct managed_win *w)
 		return;
 	}

-	// Don't round full screen windows & excluded windows
-	if ((w && win_is_fullscreen(ps, w)) ||
-	    c2_match(ps, w, ps->o.rounded_corners_blacklist, NULL)) {
+	void *radius_override = NULL;
+	if (c2_match(ps, w, ps->o.corner_radius_rules, &radius_override)) {
+		log_debug("Matched corner rule! %d", w->corner_radius);
+	}
+
+	// Don't round full screen windows & excluded windows,
+	// unless we find a corner override in corner_radius_rules
+	if (!radius_override && ((w && win_is_fullscreen(ps, w)) ||
+	                         c2_match(ps, w, ps->o.rounded_corners_blacklist, NULL))) {
 		w->corner_radius = 0;
 		log_debug("Not rounding corners for window %#010x", w->base.id);
 	} else {
-		w->corner_radius = ps->o.corner_radius;
+		if (radius_override) {
+			w->corner_radius = (int)(long)radius_override;
+		} else {
+			w->corner_radius = ps->o.corner_radius;
+		}
+
 		log_debug("Rounding corners for window %#010x", w->base.id);
 		// Initialize the border color to an invalid value
 		w->border_col[0] = w->border_col[1] = w->border_col[2] =