shithub: wl9m

Download patch

ref: eb4c674ba9dacf2a9f30c9c4167b1e7077214995
author: glenda <glenda@fileserver>
date: Mon Jan 5 22:26:35 EST 2026

first commit

--- /dev/null
+++ b/README
@@ -1,0 +1,23 @@
+In this order. 
+
+On plan9, 
+
+	cd plan9
+	mk
+	6.out [port] 
+
+On linux, 
+
+	cd linux
+	nix build
+	./result/bin/p9wl <plan9-ip> port
+
+In p9wl output you will see, 
+
+	00:00:10.028 [p9wl.c:1255] WAYLAND_DISPLAY=wayland-0 (1024x768)
+
+Then on linux,
+
+	WAYLAND_DISPLAY=wayland-0 librewolf
+
+or whichever program you want to run on 9front through the proxy. 
--- /dev/null
+++ b/linux/Makefile
@@ -1,0 +1,34 @@
+# Makefile for p9wl - Wayland compositor to Plan 9
+#
+# Install dependencies (Debian/Ubuntu):
+#   sudo apt install libwlroots-dev libwayland-dev libxkbcommon-dev libpixman-1-dev
+#
+# On some systems wlroots is versioned (wlroots-0.17, wlroots-0.18)
+# The Makefile will try to auto-detect, or you can override:
+#   make WLROOTS_PKG=wlroots-0.18
+
+# Auto-detect wlroots package name
+WLROOTS_PKG ?= $(shell pkg-config --exists wlroots && echo wlroots || \
+                       pkg-config --list-all 2>/dev/null | grep -o 'wlroots-[0-9.]*' | sort -V | tail -1)
+
+ifeq ($(WLROOTS_PKG),)
+  $(error Could not find wlroots. Install libwlroots-dev or specify WLROOTS_PKG=...)
+endif
+
+PKGS := $(WLROOTS_PKG) wayland-server xkbcommon pixman-1
+CFLAGS := -D_DEFAULT_SOURCE -Wall -lc -lpthread -g -O2 -lz -DUSE_LZ4 -llz4 $(shell pkg-config --cflags $(PKGS)) -DWLR_USE_UNSTABLE
+LDFLAGS := $(shell pkg-config --libs $(PKGS)) -lm
+
+all: p9wl 
+
+p9wl: p9wl.c
+	@echo "Building with $(WLROOTS_PKG)"
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+drawclient: drawclient.c
+	$(CC) -Wall -O2 -o $@ $< -lm
+
+clean:
+	rm -f p9wl drawclient
+
+.PHONY: all clean
--- /dev/null
+++ b/linux/flake.nix
@@ -1,0 +1,107 @@
+{
+  description = "p9wl - Wayland compositor proxy to Plan 9";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+    flake-utils.url = "github:numtide/flake-utils";
+  };
+
+  outputs = { self, nixpkgs, flake-utils }:
+    flake-utils.lib.eachDefaultSystem (system:
+      let
+        pkgs = nixpkgs.legacyPackages.${system};
+
+        p9wl = pkgs.stdenv.mkDerivation {
+          pname = "p9wl";
+          version = "0.1.0";
+
+          src = ./.;
+
+          nativeBuildInputs = with pkgs; [
+            pkg-config
+            wayland-scanner
+            gnumake
+          ];
+
+          buildInputs = with pkgs; [
+            wlroots
+            wayland
+            wayland-protocols
+            libxkbcommon
+            pixman
+            lz4
+            zlib
+          ];
+
+          preBuild = ''
+            # Generate xdg-shell protocol header
+            wayland-scanner server-header \
+              ${pkgs.wayland-protocols}/share/wayland-protocols/stable/xdg-shell/xdg-shell.xml \
+              xdg-shell-protocol.h
+
+            wayland-scanner private-code \
+              ${pkgs.wayland-protocols}/share/wayland-protocols/stable/xdg-shell/xdg-shell.xml \
+              xdg-shell-protocol.c
+          '';
+
+          # Just use the Makefile
+          buildPhase = ''
+            runHook preBuild
+            make
+            runHook postBuild
+          '';
+
+          installPhase = ''
+            runHook preInstall
+            mkdir -p $out/bin
+            cp p9wl $out/bin/
+            runHook postInstall
+          '';
+
+          meta = with pkgs.lib; {
+            description = "Wayland compositor that proxies display to Plan 9";
+            license = licenses.mit;
+            platforms = platforms.linux;
+            mainProgram = "p9wl";
+          };
+        };
+
+      in {
+        packages = {
+          default = p9wl;
+          p9wl = p9wl;
+        };
+
+        devShells.default = pkgs.mkShell {
+          inputsFrom = [ p9wl ];
+
+          packages = with pkgs; [
+            gcc
+            gdb
+            wayland-scanner
+          ];
+
+          shellHook = ''
+            # Generate protocol headers for development
+            if [ ! -f xdg-shell-protocol.h ]; then
+              echo "Generating xdg-shell-protocol.h..."
+              wayland-scanner server-header \
+                ${pkgs.wayland-protocols}/share/wayland-protocols/stable/xdg-shell/xdg-shell.xml \
+                xdg-shell-protocol.h
+            fi
+
+            if [ ! -f xdg-shell-protocol.c ]; then
+              echo "Generating xdg-shell-protocol.c..."
+              wayland-scanner private-code \
+                ${pkgs.wayland-protocols}/share/wayland-protocols/stable/xdg-shell/xdg-shell.xml \
+                xdg-shell-protocol.c
+            fi
+
+            echo "p9wl development shell"
+            echo "  Build: make"
+            echo "  Run:   ./p9wl <plan9-ip> [port]"
+          '';
+        };
+      }
+    );
+}
--- /dev/null
+++ b/linux/p9wl.c
@@ -1,0 +1,1272 @@
+/*
+ * p9wl_udp.c - Wayland compositor with UDP transport to Plan 9
+ *
+ * Based on working p9wl.c (TCP version), converted to UDP.
+ * Uses damage tracking from wlroots, LZ4 compression, NO XOR delta.
+ *
+ * Build:
+ *   gcc -O2 -o p9wl_udp p9wl_udp.c \
+ *       $(pkg-config --cflags --libs wlroots-0.19 wayland-server xkbcommon pixman-1) \
+ *       -llz4 -lm -DWLR_USE_UNSTABLE
+ *
+ * For wlroots-0.18:
+ *   gcc -O2 -o p9wl_udp p9wl_udp.c \
+ *       $(pkg-config --cflags --libs wlroots-0.18 wayland-server xkbcommon pixman-1) \
+ *       -llz4 -lm -DWLR_USE_UNSTABLE
+ */
+
+#define _POSIX_C_SOURCE 200809L
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <poll.h>
+#include <time.h>
+#include <signal.h>
+#include <linux/input-event-codes.h>
+#include <pixman.h>
+#include <lz4.h>
+
+#include <wayland-server-core.h>
+#include <wlr/backend.h>
+#include <wlr/backend/headless.h>
+#include <wlr/render/allocator.h>
+#include <wlr/render/wlr_renderer.h>
+#include <wlr/types/wlr_buffer.h>
+#include <wlr/types/wlr_compositor.h>
+#include <wlr/types/wlr_cursor.h>
+#include <wlr/types/wlr_data_device.h>
+#include <wlr/types/wlr_output.h>
+#include <wlr/types/wlr_output_layout.h>
+#include <wlr/types/wlr_scene.h>
+#include <wlr/types/wlr_seat.h>
+#include <wlr/types/wlr_subcompositor.h>
+#include <wlr/types/wlr_xcursor_manager.h>
+#include <wlr/types/wlr_xdg_shell.h>
+#include <wlr/types/wlr_keyboard.h>
+#include <wlr/interfaces/wlr_keyboard.h>
+#include <wlr/util/log.h>
+#include <xkbcommon/xkbcommon.h>
+
+/* UDP Protocol */
+#define P9WL_MAGIC        0x50395749
+#define P9WL_VERSION      1
+#define P9WL_HDR_SIZE     20
+#define P9WL_MTU          1400
+#define P9WL_MAX_CHUNK    (P9WL_MTU - 44)
+#define TILE_SIZE         16  
+
+/* Message types */
+#define P9WL_SIZE_REQ     0x01
+#define P9WL_SIZE_RESP    0x02
+#define P9WL_TILE         0x10
+#define P9WL_FLUSH        0x15
+#define P9WL_NACK         0x16
+#define P9WL_SCROLL       0x17
+#define P9WL_MOUSE        0x20
+#define P9WL_KEY          0x21
+#define P9WL_RESIZE       0x22
+
+/* Flags */
+#define P9WL_FLAG_COMPRESSED  0x01
+#define P9WL_FLAG_HAS_COPIES  0x02
+
+/* Header field offsets */
+#define HDR_MAGIC     0
+#define HDR_VERSION   4
+#define HDR_TYPE      5
+#define HDR_FLAGS     6
+#define HDR_SEQ       8
+#define HDR_FRAME     12
+#define HDR_TIMESTAMP 16
+
+/* Configuration constants */
+#define FULL_FRAME_INTERVAL_MS   5000   /* Force full frame every 5 seconds */
+#define MIN_BATCH_DELAY_US       1    /* Minimum delay between batch packets */
+#define PACKET_DELAY_US          0      /* Inter-packet delay to prevent overflow */
+#define COMPRESSION_THRESHOLD    75     /* Compress if result < 75% of original */
+#define BATCH_INTERVAL_MS        33     /* Spread tiles over 50ms */
+#define SIZE_REQ_ATTEMPTS        10     /* Number of size request retries */
+#define SIZE_REQ_TIMEOUT_MS      1000   /* Timeout per size request attempt */
+#define MAX_SCREEN_DIM           8192   /* Maximum screen dimension */
+
+#define GET16(p) ((uint16_t)(p)[0] | ((uint16_t)(p)[1]<<8))
+#define GET32(p) ((uint32_t)(p)[0] | ((uint32_t)(p)[1]<<8) | ((uint32_t)(p)[2]<<16) | ((uint32_t)(p)[3]<<24))
+#define PUT16(p, v) do { (p)[0]=(uint8_t)(v); (p)[1]=(uint8_t)((v)>>8); } while(0)
+#define PUT32(p, v) do { (p)[0]=(uint8_t)(v); (p)[1]=(uint8_t)((v)>>8); (p)[2]=(uint8_t)((v)>>16); (p)[3]=(uint8_t)((v)>>24); } while(0)
+
+/* Plan 9 special keys */
+enum {
+    KF = 0xF000,
+    Khome = KF|13, Kup = KF|14, Kpgup = KF|15,
+    Kleft = KF|17, Kright = KF|18,
+    Kdown = KF|19, Kend = KF|20, Kpgdown = KF|21,
+    Kins = KF|22, Kdel = 0x7F,
+};
+
+/* Key mapping */
+struct key_map { int rune; int keycode; int shift; };
+static const struct key_map keymap[] = {
+    {8, KEY_BACKSPACE, 0}, {9, KEY_TAB, 0}, {10, KEY_ENTER, 0}, {13, KEY_ENTER, 0},
+    {27, KEY_ESC, 0}, {Kdel, KEY_DELETE, 0},
+    {Khome, KEY_HOME, 0}, {Kup, KEY_UP, 0}, {Kpgup, KEY_PAGEUP, 0},
+    {Kleft, KEY_LEFT, 0}, {Kright, KEY_RIGHT, 0},
+    {Kdown, KEY_DOWN, 0}, {Kend, KEY_END, 0}, {Kpgdown, KEY_PAGEDOWN, 0},
+    {Kins, KEY_INSERT, 0},
+    {'a', KEY_A, 0}, {'b', KEY_B, 0}, {'c', KEY_C, 0}, {'d', KEY_D, 0},
+    {'e', KEY_E, 0}, {'f', KEY_F, 0}, {'g', KEY_G, 0}, {'h', KEY_H, 0},
+    {'i', KEY_I, 0}, {'j', KEY_J, 0}, {'k', KEY_K, 0}, {'l', KEY_L, 0},
+    {'m', KEY_M, 0}, {'n', KEY_N, 0}, {'o', KEY_O, 0}, {'p', KEY_P, 0},
+    {'q', KEY_Q, 0}, {'r', KEY_R, 0}, {'s', KEY_S, 0}, {'t', KEY_T, 0},
+    {'u', KEY_U, 0}, {'v', KEY_V, 0}, {'w', KEY_W, 0}, {'x', KEY_X, 0},
+    {'y', KEY_Y, 0}, {'z', KEY_Z, 0},
+    {'A', KEY_A, 1}, {'B', KEY_B, 1}, {'C', KEY_C, 1}, {'D', KEY_D, 1},
+    {'E', KEY_E, 1}, {'F', KEY_F, 1}, {'G', KEY_G, 1}, {'H', KEY_H, 1},
+    {'I', KEY_I, 1}, {'J', KEY_J, 1}, {'K', KEY_K, 1}, {'L', KEY_L, 1},
+    {'M', KEY_M, 1}, {'N', KEY_N, 1}, {'O', KEY_O, 1}, {'P', KEY_P, 1},
+    {'Q', KEY_Q, 1}, {'R', KEY_R, 1}, {'S', KEY_S, 1}, {'T', KEY_T, 1},
+    {'U', KEY_U, 1}, {'V', KEY_V, 1}, {'W', KEY_W, 1}, {'X', KEY_X, 1},
+    {'Y', KEY_Y, 1}, {'Z', KEY_Z, 1},
+    {'0', KEY_0, 0}, {'1', KEY_1, 0}, {'2', KEY_2, 0}, {'3', KEY_3, 0},
+    {'4', KEY_4, 0}, {'5', KEY_5, 0}, {'6', KEY_6, 0}, {'7', KEY_7, 0},
+    {'8', KEY_8, 0}, {'9', KEY_9, 0},
+    {' ', KEY_SPACE, 0}, {'!', KEY_1, 1}, {'@', KEY_2, 1}, {'#', KEY_3, 1},
+    {'$', KEY_4, 1}, {'%', KEY_5, 1}, {'^', KEY_6, 1}, {'&', KEY_7, 1},
+    {'*', KEY_8, 1}, {'(', KEY_9, 1}, {')', KEY_0, 1},
+    {'-', KEY_MINUS, 0}, {'_', KEY_MINUS, 1}, {'=', KEY_EQUAL, 0}, {'+', KEY_EQUAL, 1},
+    {'[', KEY_LEFTBRACE, 0}, {'{', KEY_LEFTBRACE, 1},
+    {']', KEY_RIGHTBRACE, 0}, {'}', KEY_RIGHTBRACE, 1},
+    {'\\', KEY_BACKSLASH, 0}, {'|', KEY_BACKSLASH, 1},
+    {';', KEY_SEMICOLON, 0}, {':', KEY_SEMICOLON, 1},
+    {'\'', KEY_APOSTROPHE, 0}, {'"', KEY_APOSTROPHE, 1},
+    {',', KEY_COMMA, 0}, {'<', KEY_COMMA, 1},
+    {'.', KEY_DOT, 0}, {'>', KEY_DOT, 1},
+    {'/', KEY_SLASH, 0}, {'?', KEY_SLASH, 1},
+    {'`', KEY_GRAVE, 0}, {'~', KEY_GRAVE, 1},
+    {0, 0, 0}
+};
+
+/* Optimized FNV-1a hash for tile deduplication - processes 32-bit words */
+static uint32_t hash_tile(uint32_t *pixels, int count) {
+    uint32_t hash = 2166136261u;
+    for (int i = 0; i < count; i++) {
+        hash ^= pixels[i];
+        hash *= 16777619u;
+    }
+    return hash;
+}
+ 
+ 
+
+/* Tile group for deduplication - tiles with same hash */
+#define MAX_COPIES_PER_GROUP 4096
+struct tile_group {
+    uint32_t hash;
+    int primary_idx;     /* First tile index (the one we'll actually send) */
+    int *copy_x;         /* Dynamically allocated */
+    int *copy_y;
+    int num_copies;      /* Additional copies (not including primary) */
+    int copy_capacity;
+};
+
+#define MAX_GROUPS 4096
+struct tile_groups {
+    struct tile_group groups[MAX_GROUPS];
+    int num_groups;
+    uint32_t *tile_hashes;  /* Hash for each tile index */
+    int *tile_group;        /* Which group each tile belongs to (-1 if none) */
+};
+
+struct toplevel {
+    struct wl_list link;
+    struct wlr_xdg_toplevel *xdg;
+    struct wlr_scene_tree *scene_tree;
+    struct wl_listener map, unmap, commit, destroy;
+    struct server *server;
+};
+
+struct server {
+    struct wl_display *display;
+    struct wlr_backend *backend;
+    struct wlr_renderer *renderer;
+    struct wlr_allocator *allocator;
+    struct wlr_scene *scene;
+    struct wlr_scene_output *scene_output;
+    struct wlr_output_layout *output_layout;
+    struct wlr_output *output;
+    struct wlr_xdg_shell *xdg_shell;
+    struct wlr_seat *seat;
+    struct wlr_cursor *cursor;
+    struct wlr_xcursor_manager *cursor_mgr;
+    struct wlr_keyboard virtual_kb;
+    
+    struct wl_listener new_output, output_frame, output_destroy;
+    struct wl_listener new_xdg_toplevel;
+    struct wl_listener new_input;
+    struct wl_list toplevels;
+    
+    int udp_fd;
+    struct sockaddr_in p9_addr;
+    
+    int width, height;
+    uint32_t send_seq, frame_seq;
+    
+    uint32_t *framebuf;
+    uint32_t *prev_framebuf;
+    uint32_t *tilebuf;      /* For XRGB->XBGR conversion */
+    char *compbuf;
+    int compbuf_size;
+    uint8_t *sendbuf;
+    
+    int tiles_x, tiles_y;
+    
+    /* Damage tracking */
+    int damage_x1, damage_y1, damage_x2, damage_y2;
+    int has_damage;
+    int force_full_frame;
+    uint32_t last_full_frame_ms;
+    
+    /* Paced batch sending */
+    struct wl_event_source *batch_timer;
+    int *pending_tiles;      /* Tile indices to send */
+    int num_pending;         /* How many tiles pending */
+    int batch_idx;           /* Current index in pending_tiles */
+    int batch_active;        /* Currently sending a batch */
+    uint32_t batch_start_us; /* When batch started (microseconds) */
+    int batch_interval_us;   /* Total time for batch (microseconds) */
+    
+    /* Tile deduplication */
+    struct tile_groups dedup;
+    
+    /* Global frame counter (unified across functions) */
+    int global_frame_count;
+};
+
+static uint32_t now_ms(void) {
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
+}
+
+static uint64_t now_us(void) {
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (uint64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
+}
+
+/* Build protocol header - reduces code duplication */
+static void build_header(uint8_t *pkt, uint8_t type, uint8_t flags,
+                         uint32_t seq, uint32_t frame) {
+    PUT32(pkt + HDR_MAGIC, P9WL_MAGIC);
+    pkt[HDR_VERSION] = P9WL_VERSION;
+    pkt[HDR_TYPE] = type;
+    pkt[HDR_FLAGS] = flags;
+    pkt[7] = 0;  
+    PUT32(pkt + HDR_SEQ, seq);
+    PUT32(pkt + HDR_FRAME, frame);
+    PUT32(pkt + HDR_TIMESTAMP, now_ms());
+}
+
+static void send_pkt(struct server *s, uint8_t *pkt, int len) {
+    sendto(s->udp_fd, pkt, len, 0, 
+           (struct sockaddr*)&s->p9_addr, sizeof(s->p9_addr));
+    
+    /* Delay after every packet to prevent receiver buffer overflow */
+    usleep(PACKET_DELAY_US);
+}
+
+static void send_flush(struct server *s) {
+    uint8_t pkt[20];
+    build_header(pkt, P9WL_FLUSH, 0, s->send_seq++, s->frame_seq);
+    send_pkt(s, pkt, 20);
+}
+  
+static int send_tile(struct server *s, int tx, int ty) {
+    /* Bounds check */
+    if (tx < 0 || tx >= s->tiles_x || ty < 0 || ty >= s->tiles_y)
+        return 0;
+    
+    int x = tx * TILE_SIZE;
+    int y = ty * TILE_SIZE;
+    int w = (tx == s->tiles_x - 1) ? (s->width - x) : TILE_SIZE;
+    int h = (ty == s->tiles_y - 1) ? (s->height - y) : TILE_SIZE;
+    if (w <= 0 || h <= 0) return 0;
+    
+    /* Convert XRGB->XBGR and update prev */
+    for (int row = 0; row < h; row++) {
+        uint32_t *curr = &s->framebuf[(y + row) * s->width + x];
+        uint32_t *prev = &s->prev_framebuf[(y + row) * s->width + x];
+        uint32_t *dst = &s->tilebuf[row * w];
+        for (int col = 0; col < w; col++) {
+            uint32_t p = curr[col];
+            /* XRGB -> XBGR: swap R and B */
+            dst[col] = (p & 0xFF00FF00) | ((p >> 16) & 0xFF) | ((p & 0xFF) << 16);
+        }
+        memcpy(prev, curr, w * 4);
+    }
+    
+    int raw_size = w * h * 4;
+    int comp_size = LZ4_compress_default((char*)s->tilebuf, s->compbuf, raw_size, s->compbuf_size);
+    
+    uint8_t *data;
+    int data_size;
+    int compressed = 0;
+    
+    if (comp_size > 0 && comp_size < (raw_size * COMPRESSION_THRESHOLD) / 100) {
+        data = (uint8_t*)s->compbuf;
+        data_size = comp_size;
+        compressed = 1;
+    } else {
+        data = (uint8_t*)s->tilebuf;
+        data_size = raw_size;
+    }
+    
+    /* Send in chunks */
+    int chunk_count = (data_size + P9WL_MAX_CHUNK - 1) / P9WL_MAX_CHUNK;
+    int offset = 0;
+    
+    for (int i = 0; i < chunk_count; i++) {
+        int chunk_len = (i == chunk_count - 1) ? (data_size - offset) : P9WL_MAX_CHUNK;
+        
+        uint8_t *pkt = s->sendbuf;
+        build_header(pkt, P9WL_TILE, compressed ? P9WL_FLAG_COMPRESSED : 0,
+                     s->send_seq++, s->frame_seq);
+        PUT16(pkt + 20, x);
+        PUT16(pkt + 22, y);
+        PUT16(pkt + 24, w);
+        PUT16(pkt + 26, h);
+        PUT16(pkt + 28, i);
+        PUT16(pkt + 30, chunk_count);
+        PUT32(pkt + 32, data_size);
+        PUT32(pkt + 36, chunk_len);
+        memcpy(pkt + 40, data + offset, chunk_len);
+        
+        send_pkt(s, pkt, 40 + chunk_len);
+        offset += chunk_len;
+    }
+    return 1;
+}
+
+/* Send a tile with copy destinations for duplicates */
+static void send_tile_with_copies(struct server *s, int tx, int ty, 
+                                   int *copy_x, int *copy_y, int num_copies) {
+    /* Bounds check */
+    if (tx < 0 || tx >= s->tiles_x || ty < 0 || ty >= s->tiles_y)
+        return;
+    
+    int x = tx * TILE_SIZE;
+    int y = ty * TILE_SIZE;
+    int w = (tx == s->tiles_x - 1) ? (s->width - x) : TILE_SIZE;
+    int h = (ty == s->tiles_y - 1) ? (s->height - y) : TILE_SIZE;
+    if (w <= 0 || h <= 0) return;
+    
+    /* Convert XRGB->XBGR and update prev for primary tile */
+    for (int row = 0; row < h; row++) {
+        uint32_t *curr = &s->framebuf[(y + row) * s->width + x];
+        uint32_t *prev = &s->prev_framebuf[(y + row) * s->width + x];
+        uint32_t *dst = &s->tilebuf[row * w];
+        for (int col = 0; col < w; col++) {
+            uint32_t p = curr[col];
+            dst[col] = (p & 0xFF00FF00) | ((p >> 16) & 0xFF) | ((p & 0xFF) << 16);
+        }
+        memcpy(prev, curr, w * 4);
+    }
+    
+    /* Also update prev for copy destinations */
+    for (int c = 0; c < num_copies; c++) {
+        int cx = copy_x[c];
+        int cy = copy_y[c];
+        for (int row = 0; row < h; row++) {
+            uint32_t *curr = &s->framebuf[(cy + row) * s->width + cx];
+            uint32_t *prev = &s->prev_framebuf[(cy + row) * s->width + cx];
+            memcpy(prev, curr, w * 4);
+        }
+    }
+    
+    int raw_size = w * h * 4;
+    int comp_size = LZ4_compress_default((char*)s->tilebuf, s->compbuf, raw_size, s->compbuf_size);
+    
+    uint8_t *data;
+    int data_size;
+    int compressed = 0;
+    
+    if (comp_size > 0 && comp_size < (raw_size * COMPRESSION_THRESHOLD) / 100) {
+        data = (uint8_t*)s->compbuf;
+        data_size = comp_size;
+        compressed = 1;
+    } else {
+        data = (uint8_t*)s->tilebuf;
+        data_size = raw_size;
+    }
+    
+    /* Build list of ALL positions (primary + copies) */
+    int total_positions = 1 + num_copies;
+    int *all_x = malloc(total_positions * sizeof(int));
+    int *all_y = malloc(total_positions * sizeof(int));
+    if (!all_x || !all_y) {
+        free(all_x);
+        free(all_y);
+        /* Fallback: send without copies */
+        send_tile(s, tx, ty);
+        return;
+    }
+    all_x[0] = x;
+    all_y[0] = y;
+    for (int c = 0; c < num_copies; c++) {
+        all_x[1 + c] = copy_x[c];
+        all_y[1 + c] = copy_y[c];
+    }
+    
+    /* Calculate how many copy positions fit per packet after bitmap */
+    /* Packet: header(40) + data + count(2) + positions(4 each) */
+    int avail = P9WL_MAX_CHUNK - data_size - 2;
+    int copies_per_pkt = avail / 4;
+    if (copies_per_pkt < 0) copies_per_pkt = 0;
+    
+    /* Send packets: each has bitmap at one position + list of additional positions */
+    int pos_idx = 0;
+    while (pos_idx < total_positions) {
+        int primary_x = all_x[pos_idx];
+        int primary_y = all_y[pos_idx];
+        pos_idx++;
+        
+        /* How many additional positions in this packet? */
+        int remaining = total_positions - pos_idx;
+        int batch = (remaining > copies_per_pkt) ? copies_per_pkt : remaining;
+        
+        int flags = (compressed ? P9WL_FLAG_COMPRESSED : 0) |
+                    (batch > 0 ? P9WL_FLAG_HAS_COPIES : 0);
+        
+        uint8_t *pkt = s->sendbuf;
+        build_header(pkt, P9WL_TILE, flags, s->send_seq++, s->frame_seq);
+        PUT16(pkt + 20, primary_x);
+        PUT16(pkt + 22, primary_y);
+        PUT16(pkt + 24, w);
+        PUT16(pkt + 26, h);
+        PUT16(pkt + 28, 0);
+        PUT16(pkt + 30, 1);
+        PUT32(pkt + 32, data_size);
+        PUT32(pkt + 36, data_size);
+        memcpy(pkt + 40, data, data_size);
+        
+        int pkt_len = 40 + data_size;
+        
+        if (batch > 0) {
+            PUT16(pkt + pkt_len, batch);
+            pkt_len += 2;
+            for (int c = 0; c < batch; c++) {
+                PUT16(pkt + pkt_len, all_x[pos_idx + c]);
+                PUT16(pkt + pkt_len + 2, all_y[pos_idx + c]);
+                pkt_len += 4;
+            }
+            pos_idx += batch;
+        }
+        
+        send_pkt(s, pkt, pkt_len);
+    }
+    
+    free(all_x);
+    free(all_y);
+}
+
+/* Send one tile by index - returns 1 if sent */
+static int send_tile_idx(struct server *s, int tile_idx) {
+    int tx = tile_idx % s->tiles_x;
+    int ty = tile_idx / s->tiles_x;
+    return send_tile(s, tx, ty);
+}
+
+/* Free tile group allocations */
+static void free_tile_groups(struct tile_groups *dedup) {
+    for (int g = 0; g < MAX_GROUPS; g++) {
+        free(dedup->groups[g].copy_x);
+        free(dedup->groups[g].copy_y);
+        dedup->groups[g].copy_x = NULL;
+        dedup->groups[g].copy_y = NULL;
+    }
+    free(dedup->tile_hashes);
+    free(dedup->tile_group);
+    dedup->tile_hashes = NULL;
+    dedup->tile_group = NULL;
+    dedup->num_groups = 0;
+}
+
+static int batch_timer_cb(void *data) {
+    struct server *s = data;
+    send_flush(s);
+    return 0;
+}
+
+static int calc_flush_delay(int num_tiles) {
+    int delay = 5 + (num_tiles / 100);
+    return delay > 30 ? 30 : delay;
+}
+
+static void send_frame(struct server *s) {
+    int total_tiles = s->tiles_x * s->tiles_y;
+    
+    s->global_frame_count++;
+    
+    /* Force full frame for first frame to ensure clean startup */
+    if (s->global_frame_count <= 1) {
+        s->force_full_frame = 1;
+    }
+    
+    if (!s->has_damage && !s->force_full_frame) {
+        /* Force full frame periodically to clear any stuck artifacts */
+        uint32_t now = now_ms();
+        if (now - s->last_full_frame_ms > FULL_FRAME_INTERVAL_MS) {
+            s->force_full_frame = 1;
+            s->last_full_frame_ms = now;
+        } else {
+            return;
+        }
+    }
+    
+    if (s->force_full_frame)
+        s->last_full_frame_ms = now_ms();
+    
+    s->frame_seq++;
+        
+    /* Collect tiles that need sending into pending_tiles array */
+    s->num_pending = 0;
+    for (int ty = 0; ty < s->tiles_y; ty++) {
+        for (int tx = 0; tx < s->tiles_x; tx++) {
+            int x = tx * TILE_SIZE;
+            int y = ty * TILE_SIZE;
+            int w = (tx == s->tiles_x - 1) ? (s->width - x) : TILE_SIZE;
+            int h = (ty == s->tiles_y - 1) ? (s->height - y) : TILE_SIZE;
+            if (w <= 0 || h <= 0) continue;
+            
+            /* Check damage region */
+            if (!s->force_full_frame && s->has_damage) {
+                if (x + w <= s->damage_x1 || x >= s->damage_x2 ||
+                    y + h <= s->damage_y1 || y >= s->damage_y2) {
+                    continue;
+                }
+            }
+            
+            /* Check if changed */
+            int changed = s->force_full_frame;
+            if (!changed) {
+                for (int row = 0; row < h; row++) {
+                    uint32_t *curr = &s->framebuf[(y + row) * s->width + x];
+                    uint32_t *prev = &s->prev_framebuf[(y + row) * s->width + x];
+                    if (memcmp(curr, prev, w * 4) != 0) {
+                        changed = 1;
+                        break;
+                    }
+                }
+            }
+            
+            if (changed) {
+                s->pending_tiles[s->num_pending++] = ty * s->tiles_x + tx;
+            }
+        }
+    }
+    
+    if (s->global_frame_count <= 10 || (s->global_frame_count % 60 == 0))
+        wlr_log(WLR_INFO, "Frame %d: sending %d/%d tiles (force=%d damage=%d)", 
+                s->global_frame_count, s->num_pending, total_tiles, s->force_full_frame, s->has_damage);
+    
+    s->has_damage = 0;
+    s->force_full_frame = 0;
+    
+    if (s->num_pending == 0) {
+        return;
+    }
+    
+    /* Allocate hash tracking if needed */
+    if (!s->dedup.tile_hashes) {
+        s->dedup.tile_hashes = calloc(total_tiles, sizeof(uint32_t));
+        s->dedup.tile_group = malloc(total_tiles * sizeof(int));
+        if (!s->dedup.tile_hashes || !s->dedup.tile_group) {
+            wlr_log(WLR_ERROR, "Failed to allocate dedup arrays");
+            free(s->dedup.tile_hashes);
+            free(s->dedup.tile_group);
+            s->dedup.tile_hashes = NULL;
+            s->dedup.tile_group = NULL;
+            /* Fallback: send tiles without deduplication */
+            for (int i = 0; i < s->num_pending; i++) {
+                send_tile_idx(s, s->pending_tiles[i]);
+            }
+			wl_event_source_timer_update(s->batch_timer, calc_flush_delay(s->num_pending));
+            return;
+        }
+    }
+    
+    /* Compute hashes for pending tiles - hash original XRGB data for efficiency */
+    for (int i = 0; i < s->num_pending; i++) {
+        int tile_idx = s->pending_tiles[i];
+        int tx = tile_idx % s->tiles_x;
+        int ty = tile_idx / s->tiles_x;
+        int x = tx * TILE_SIZE;
+        int y = ty * TILE_SIZE;
+        int w = (tx == s->tiles_x - 1) ? (s->width - x) : TILE_SIZE;
+        int h = (ty == s->tiles_y - 1) ? (s->height - y) : TILE_SIZE;
+        
+        /* Hash directly from framebuf (XRGB) - faster than converting first */
+        uint32_t hash = 2166136261u;
+        for (int row = 0; row < h; row++) {
+            uint32_t *src = &s->framebuf[(y + row) * s->width + x];
+            for (int col = 0; col < w; col++) {
+                hash ^= src[col];
+                hash *= 16777619u;
+            }
+        }
+        s->dedup.tile_hashes[tile_idx] = hash;
+    }
+    
+    /* Reset groups */
+    for (int g = 0; g < s->dedup.num_groups; g++) {
+        s->dedup.groups[g].num_copies = 0;
+    }
+    s->dedup.num_groups = 0;
+    
+    /* Clear group assignments */
+    for (int i = 0; i < s->num_pending; i++) {
+        s->dedup.tile_group[s->pending_tiles[i]] = -1;
+    }
+    
+    /* Group pending tiles by hash */
+    for (int i = 0; i < s->num_pending; i++) {
+        int tile_idx = s->pending_tiles[i];
+        int tx = tile_idx % s->tiles_x;
+        int ty = tile_idx / s->tiles_x;
+        int x = tx * TILE_SIZE;
+        int y = ty * TILE_SIZE;
+        
+        uint32_t hash = s->dedup.tile_hashes[tile_idx];
+        
+        /* Find existing group with same hash */
+        int found_group = -1;
+        for (int g = 0; g < s->dedup.num_groups; g++) {
+            if (s->dedup.groups[g].hash == hash) {
+                found_group = g;
+                break;
+            }
+        }
+        
+        if (found_group >= 0) {
+            /* Add as copy to existing group */
+            struct tile_group *grp = &s->dedup.groups[found_group];
+            if (grp->num_copies < MAX_COPIES_PER_GROUP) {
+                if (!grp->copy_x) {
+                    grp->copy_x = malloc(MAX_COPIES_PER_GROUP * sizeof(int));
+                    grp->copy_y = malloc(MAX_COPIES_PER_GROUP * sizeof(int));
+                    if (!grp->copy_x || !grp->copy_y) {
+                        free(grp->copy_x);
+                        free(grp->copy_y);
+                        grp->copy_x = NULL;
+                        grp->copy_y = NULL;
+                        continue;  /* Skip this copy */
+                    }
+                }
+                grp->copy_x[grp->num_copies] = x;
+                grp->copy_y[grp->num_copies] = y;
+                grp->num_copies++;
+                s->dedup.tile_group[tile_idx] = found_group;
+            }
+        } else if (s->dedup.num_groups < MAX_GROUPS) {
+            /* Create new group */
+            struct tile_group *grp = &s->dedup.groups[s->dedup.num_groups];
+            grp->hash = hash;
+            grp->primary_idx = tile_idx;
+            grp->num_copies = 0;
+            s->dedup.tile_group[tile_idx] = s->dedup.num_groups;
+            s->dedup.num_groups++;
+        }
+    }
+    
+    /* Send each group */
+    for (int g = 0; g < s->dedup.num_groups; g++) {
+        struct tile_group *grp = &s->dedup.groups[g];
+        int tile_idx = grp->primary_idx;
+        int tx = tile_idx % s->tiles_x;
+        int ty = tile_idx / s->tiles_x;
+        
+        if (grp->num_copies > 0) {
+            send_tile_with_copies(s, tx, ty, grp->copy_x, grp->copy_y, grp->num_copies);
+        } else {
+            send_tile(s, tx, ty);
+        }
+    }
+    
+    wl_event_source_timer_update(s->batch_timer, calc_flush_delay(s->dedup.num_groups));
+}
+
+static void handle_key(struct server *s, int rune) {
+    const struct key_map *km = NULL;
+    for (const struct key_map *m = keymap; m->keycode; m++) {
+        if (m->rune == rune) { km = m; break; }
+    }
+    
+    /* Keymap match takes priority */
+    if (km) {
+        uint32_t t = now_ms();
+        wlr_seat_set_keyboard(s->seat, &s->virtual_kb);
+        
+        if (km->shift) {
+            struct wlr_keyboard_modifiers mods = { .depressed = WLR_MODIFIER_SHIFT };
+            wlr_seat_keyboard_notify_modifiers(s->seat, &mods);
+        }
+        wlr_seat_keyboard_notify_key(s->seat, t, km->keycode, WL_KEYBOARD_KEY_STATE_PRESSED);
+        wlr_seat_keyboard_notify_key(s->seat, t+1, km->keycode, WL_KEYBOARD_KEY_STATE_RELEASED);
+        if (km->shift) {
+            struct wlr_keyboard_modifiers mods = {0};
+            wlr_seat_keyboard_notify_modifiers(s->seat, &mods);
+        }
+        return;
+    }
+    
+    /* Ctrl+letter for unmapped control characters */
+    if (rune >= 1 && rune <= 26) {
+        int keycode = KEY_A + rune - 1;
+        uint32_t t = now_ms();
+        struct wlr_keyboard_modifiers mods = { .depressed = WLR_MODIFIER_CTRL };
+        wlr_seat_set_keyboard(s->seat, &s->virtual_kb);
+        wlr_seat_keyboard_notify_modifiers(s->seat, &mods);
+        wlr_seat_keyboard_notify_key(s->seat, t, keycode, WL_KEYBOARD_KEY_STATE_PRESSED);
+        wlr_seat_keyboard_notify_key(s->seat, t+1, keycode, WL_KEYBOARD_KEY_STATE_RELEASED);
+        mods.depressed = 0;
+        wlr_seat_keyboard_notify_modifiers(s->seat, &mods);
+        return;
+    }
+}
+static void handle_mouse(struct server *s, int mx, int my, int buttons) {
+    wlr_cursor_warp_absolute(s->cursor, NULL, (double)mx / s->width, (double)my / s->height);
+    
+    double sx, sy;
+    struct wlr_surface *surface = NULL;
+    struct wlr_scene_node *node = wlr_scene_node_at(&s->scene->tree.node,
+                                                     s->cursor->x, s->cursor->y, &sx, &sy);
+    if (node && node->type == WLR_SCENE_NODE_BUFFER) {
+        struct wlr_scene_buffer *sb = wlr_scene_buffer_from_node(node);
+        struct wlr_scene_surface *ss = wlr_scene_surface_try_from_buffer(sb);
+        if (ss) surface = ss->surface;
+    }
+    
+    uint32_t t = now_ms();
+    static int last_buttons = 0;
+    int changed = buttons ^ last_buttons;
+    
+    if (surface) {
+        wlr_seat_pointer_notify_enter(s->seat, surface, sx, sy);
+        wlr_seat_pointer_notify_motion(s->seat, t, sx, sy);
+        if ((changed & 1) && (buttons & 1)) {
+            wlr_seat_keyboard_notify_enter(s->seat, surface,
+                s->virtual_kb.keycodes, s->virtual_kb.num_keycodes,
+                &s->virtual_kb.modifiers);
+        }
+    } else {
+        wlr_seat_pointer_clear_focus(s->seat);
+    }
+    
+    if (changed & 1) wlr_seat_pointer_notify_button(s->seat, t, BTN_LEFT,
+        (buttons & 1) ? WL_POINTER_BUTTON_STATE_PRESSED : WL_POINTER_BUTTON_STATE_RELEASED);
+    if (changed & 2) wlr_seat_pointer_notify_button(s->seat, t, BTN_MIDDLE,
+        (buttons & 2) ? WL_POINTER_BUTTON_STATE_PRESSED : WL_POINTER_BUTTON_STATE_RELEASED);
+    if (changed & 4) wlr_seat_pointer_notify_button(s->seat, t, BTN_RIGHT,
+        (buttons & 4) ? WL_POINTER_BUTTON_STATE_PRESSED : WL_POINTER_BUTTON_STATE_RELEASED);
+    
+    if (buttons & 8) wlr_seat_pointer_notify_axis(s->seat, t, WL_POINTER_AXIS_VERTICAL_SCROLL,
+        -15, -1, WL_POINTER_AXIS_SOURCE_WHEEL, WL_POINTER_AXIS_RELATIVE_DIRECTION_IDENTICAL);
+    if (buttons & 16) wlr_seat_pointer_notify_axis(s->seat, t, WL_POINTER_AXIS_VERTICAL_SCROLL,
+        15, 1, WL_POINTER_AXIS_SOURCE_WHEEL, WL_POINTER_AXIS_RELATIVE_DIRECTION_IDENTICAL);
+    
+    last_buttons = buttons;
+    wlr_seat_pointer_notify_frame(s->seat);
+}
+
+static void do_resize(struct server *s, int nw, int nh) {
+    if (nw <= 0 || nh <= 0 || nw > MAX_SCREEN_DIM || nh > MAX_SCREEN_DIM) return;
+    if (nw == s->width && nh == s->height) return;
+    
+    wlr_log(WLR_INFO, "Resize: %dx%d -> %dx%d", s->width, s->height, nw, nh);
+    
+    s->width = nw;
+    s->height = nh;
+    s->tiles_x = (nw + TILE_SIZE - 1) / TILE_SIZE;
+    s->tiles_y = (nh + TILE_SIZE - 1) / TILE_SIZE;
+    int total_tiles = s->tiles_x * s->tiles_y;
+    
+    /* Free old buffers */
+    free(s->framebuf);
+    free(s->prev_framebuf);
+    free(s->tilebuf);
+    free(s->pending_tiles);
+    
+    /* Allocate new buffers */
+    s->framebuf = calloc(nw * nh, 4);
+    s->prev_framebuf = calloc(nw * nh, 4);
+    s->tilebuf = malloc(TILE_SIZE * TILE_SIZE * 4);
+    s->pending_tiles = malloc(total_tiles * sizeof(int));
+    
+    if (!s->framebuf || !s->prev_framebuf || !s->tilebuf || !s->pending_tiles) {
+        wlr_log(WLR_ERROR, "Resize allocation failed");
+        /* Attempt recovery - this is fatal but try to avoid crash */
+        return;
+    }
+    
+    /* Free and reallocate dedup arrays */
+    free_tile_groups(&s->dedup);
+    s->dedup.tile_hashes = calloc(total_tiles, sizeof(uint32_t));
+    s->dedup.tile_group = malloc(total_tiles * sizeof(int));
+    /* Note: These can be NULL, send_frame handles that case */
+    
+    s->force_full_frame = 1;
+    
+    if (s->output) {
+        struct wlr_output_state state;
+        wlr_output_state_init(&state);
+        wlr_output_state_set_enabled(&state, true);
+        wlr_output_state_set_custom_mode(&state, nw, nh, 60000);
+        wlr_output_commit_state(s->output, &state);
+        wlr_output_state_finish(&state);
+    }
+    
+    struct toplevel *tl;
+    wl_list_for_each(tl, &s->toplevels, link) {
+        if (tl->xdg) {
+            wlr_xdg_toplevel_set_size(tl->xdg, nw, nh);
+            wlr_xdg_toplevel_set_maximized(tl->xdg, true);
+        }
+    }
+}
+
+static void handle_packet(struct server *s, uint8_t *buf, size_t len) {
+    if (len < P9WL_HDR_SIZE) return;
+    if (GET32(buf + HDR_MAGIC) != P9WL_MAGIC) return;
+    if (buf[HDR_VERSION] != P9WL_VERSION) {
+        wlr_log(WLR_DEBUG, "Protocol version mismatch: got %d, want %d",
+                buf[HDR_VERSION], P9WL_VERSION);
+        return;
+    }
+    
+    switch (buf[HDR_TYPE]) {
+    case P9WL_MOUSE:
+        if (len >= 28) {
+            handle_mouse(s, (int16_t)GET16(buf + 20), (int16_t)GET16(buf + 22), buf[24]);
+        }
+        break;
+    case P9WL_KEY:
+        if (len >= 28) {
+            handle_key(s, (int32_t)GET32(buf + 20));
+        }
+        break;
+    case P9WL_RESIZE:
+        if (len >= 24) {
+            do_resize(s, GET16(buf + 20), GET16(buf + 22));
+        }
+        break;
+    }
+}
+
+static int handle_udp(int fd, uint32_t mask, void *data) {
+    struct server *s = data;
+    uint8_t buf[2048];
+    while (1) {
+        ssize_t n = recv(s->udp_fd, buf, sizeof(buf), MSG_DONTWAIT);
+        if (n <= 0) break;
+        handle_packet(s, buf, n);
+    }
+    return 0;
+}
+
+static int get_size(int udp_fd, struct sockaddr_in *addr, int *w, int *h) {
+    uint8_t pkt[24];
+    PUT32(pkt + HDR_MAGIC, P9WL_MAGIC);
+    pkt[HDR_VERSION] = P9WL_VERSION;
+    pkt[HDR_TYPE] = P9WL_SIZE_REQ;
+    pkt[HDR_FLAGS] = 0;
+    pkt[7] = 0;
+    PUT32(pkt + HDR_SEQ, 0);
+    PUT32(pkt + HDR_FRAME, 0);
+    PUT32(pkt + HDR_TIMESTAMP, now_ms());
+    PUT16(pkt + 20, 0);
+    PUT16(pkt + 22, 0);
+    
+    for (int attempt = 0; attempt < SIZE_REQ_ATTEMPTS; attempt++) {
+        wlr_log(WLR_INFO, "SIZE_REQ attempt %d/%d", attempt + 1, SIZE_REQ_ATTEMPTS);
+        sendto(udp_fd, pkt, 24, 0, (struct sockaddr*)addr, sizeof(*addr));
+        struct pollfd pfd = { .fd = udp_fd, .events = POLLIN };
+        if (poll(&pfd, 1, SIZE_REQ_TIMEOUT_MS) > 0) {
+            uint8_t buf[64];
+            ssize_t n = recv(udp_fd, buf, sizeof(buf), 0);
+            if (n >= 24 && GET32(buf + HDR_MAGIC) == P9WL_MAGIC &&
+                buf[HDR_TYPE] == P9WL_SIZE_RESP) {
+                *w = GET16(buf + 20);
+                *h = GET16(buf + 22);
+                wlr_log(WLR_INFO, "SIZE_RESP: %dx%d", *w, *h);
+                return 0;
+            }
+        }
+    }
+    return -1;
+}
+
+/* Toplevel callbacks */
+static void tl_map(struct wl_listener *l, void *d) {
+    struct toplevel *tl = wl_container_of(l, tl, map);
+    wlr_xdg_toplevel_set_size(tl->xdg, tl->server->width, tl->server->height);
+    wlr_xdg_toplevel_set_maximized(tl->xdg, true);
+    wlr_seat_keyboard_notify_enter(tl->server->seat, tl->xdg->base->surface,
+        tl->server->virtual_kb.keycodes, tl->server->virtual_kb.num_keycodes,
+        &tl->server->virtual_kb.modifiers);
+}
+static void tl_unmap(struct wl_listener *l, void *d) {}
+static void tl_commit(struct wl_listener *l, void *d) {
+    struct toplevel *tl = wl_container_of(l, tl, commit);
+    if (tl->xdg->base->initial_commit) {
+        wlr_xdg_toplevel_set_size(tl->xdg, tl->server->width, tl->server->height);
+        wlr_xdg_toplevel_set_maximized(tl->xdg, true);
+    }
+}
+static void tl_destroy(struct wl_listener *l, void *d) {
+    struct toplevel *tl = wl_container_of(l, tl, destroy);
+    wl_list_remove(&tl->map.link);
+    wl_list_remove(&tl->unmap.link);
+    wl_list_remove(&tl->commit.link);
+    wl_list_remove(&tl->destroy.link);
+    wl_list_remove(&tl->link);
+    free(tl);
+}
+
+static void new_toplevel(struct wl_listener *l, void *d) {
+    struct server *s = wl_container_of(l, s, new_xdg_toplevel);
+    struct wlr_xdg_toplevel *xdg = d;
+    
+    struct toplevel *tl = calloc(1, sizeof(*tl));
+    if (!tl) {
+        wlr_log(WLR_ERROR, "Failed to allocate toplevel");
+        return;
+    }
+    tl->server = s;
+    tl->xdg = xdg;
+    tl->scene_tree = wlr_scene_xdg_surface_create(&s->scene->tree, xdg->base);
+    xdg->base->data = tl->scene_tree;
+    
+    tl->map.notify = tl_map; wl_signal_add(&xdg->base->surface->events.map, &tl->map);
+    tl->unmap.notify = tl_unmap; wl_signal_add(&xdg->base->surface->events.unmap, &tl->unmap);
+    tl->commit.notify = tl_commit; wl_signal_add(&xdg->base->surface->events.commit, &tl->commit);
+    tl->destroy.notify = tl_destroy; wl_signal_add(&xdg->events.destroy, &tl->destroy);
+    
+    wl_list_insert(&s->toplevels, &tl->link);
+}
+
+static void output_frame(struct wl_listener *listener, void *data) {
+    struct server *s = wl_container_of(listener, s, output_frame);
+    
+    s->global_frame_count++;
+    if (!s || !s->scene_output || !s->framebuf || !s->output) {
+        if (s->global_frame_count <= 5)
+            wlr_log(WLR_ERROR, "output_frame: null pointer");
+        return;
+    }
+    
+    struct wlr_scene_output *so = s->scene_output;
+    struct wlr_output_state state;
+    wlr_output_state_init(&state);
+    
+    struct wlr_scene_output_state_options opts = {0};
+    
+    if (!wlr_scene_output_build_state(so, &state, &opts)) {
+        wlr_output_state_finish(&state);
+        /* Fill with gray on failure */
+        for (int i = 0; i < s->width * s->height; i++)
+            s->framebuf[i] = 0xFF303030;
+        s->force_full_frame = 1;
+        if (s->global_frame_count <= 5)
+            wlr_log(WLR_INFO, "output_frame %d: build_state failed, sending gray", s->global_frame_count);
+        send_frame(s);
+        return;
+    }
+    
+    /* Extract damage */
+    s->has_damage = 0;
+    if (state.committed & WLR_OUTPUT_STATE_DAMAGE) {
+        pixman_box32_t *ext = pixman_region32_extents(&state.damage);
+        if (ext->x2 > ext->x1 && ext->y2 > ext->y1) {
+            s->damage_x1 = ext->x1;
+            s->damage_y1 = ext->y1;
+            s->damage_x2 = ext->x2;
+            s->damage_y2 = ext->y2;
+            s->has_damage = 1;
+        }
+    }
+    
+    struct wlr_buffer *buffer = state.buffer;
+    
+    if (s->global_frame_count <= 5)
+        wlr_log(WLR_INFO, "output_frame %d: damage=%d force=%d buffer=%p", 
+                s->global_frame_count, s->has_damage, s->force_full_frame, (void*)buffer);
+    
+    /* Skip if no damage and not forced */
+    if (!s->force_full_frame && !s->has_damage) {
+        wlr_output_commit_state(s->output, &state);
+        wlr_output_state_finish(&state);
+        struct timespec now;
+        clock_gettime(CLOCK_MONOTONIC, &now);
+        wlr_scene_output_send_frame_done(so, &now);
+        return;
+    }
+    
+    if (!buffer) {
+        wlr_log(WLR_ERROR, "output_frame %d: no buffer", s->global_frame_count);
+        wlr_output_commit_state(s->output, &state);
+        wlr_output_state_finish(&state);
+        struct timespec now;
+        clock_gettime(CLOCK_MONOTONIC, &now);
+        wlr_scene_output_send_frame_done(so, &now);
+        return;
+    }
+    
+    /* Copy pixels from buffer */
+    if (buffer->width == s->width && buffer->height == s->height) {
+        void *data_ptr = NULL;
+        uint32_t fmt = 0;
+        size_t stride = 0;
+        
+        if (wlr_buffer_begin_data_ptr_access(buffer, WLR_BUFFER_DATA_PTR_ACCESS_READ,
+                                              &data_ptr, &fmt, &stride)) {
+            if (s->global_frame_count <= 10)
+                wlr_log(WLR_INFO, "output_frame %d: stride=%zu width=%d (expected %d) fmt=0x%x", 
+                        s->global_frame_count, stride, buffer->width, buffer->width * 4, fmt);
+            
+            /* Always copy full buffer */
+            int nonzero = 0;
+            for (int y = 0; y < buffer->height; y++) {
+                uint8_t *src = (uint8_t*)data_ptr + y * stride;
+                uint32_t *dst = &s->framebuf[y * s->width];
+                memcpy(dst, src, s->width * 4);
+                if (!nonzero) {
+                    for (int x = 0; x < s->width; x++) {
+                        if (dst[x] != 0) { nonzero = 1; break; }
+                    }
+                }
+            }
+            if (s->global_frame_count <= 10)
+                wlr_log(WLR_INFO, "output_frame %d: copied, nonzero=%d", s->global_frame_count, nonzero);
+            wlr_buffer_end_data_ptr_access(buffer);
+        } else {
+            wlr_log(WLR_ERROR, "output_frame %d: buffer access failed, skipping", s->global_frame_count);
+            wlr_output_commit_state(s->output, &state);
+            wlr_output_state_finish(&state);
+            struct timespec now;
+            clock_gettime(CLOCK_MONOTONIC, &now);
+            wlr_scene_output_send_frame_done(so, &now);
+            return;
+        }
+    } else {
+        wlr_log(WLR_ERROR, "output_frame %d: bad buffer (buf=%p size=%dx%d expected %dx%d), skipping",
+                s->global_frame_count, (void*)buffer, 
+                buffer->width, buffer->height,
+                s->width, s->height);
+        wlr_output_commit_state(s->output, &state);
+        wlr_output_state_finish(&state);
+        struct timespec now;
+        clock_gettime(CLOCK_MONOTONIC, &now);
+        wlr_scene_output_send_frame_done(so, &now);
+        return;
+    }
+    
+    wlr_output_commit_state(s->output, &state);
+    wlr_output_state_finish(&state);
+    
+    struct timespec now;
+    clock_gettime(CLOCK_MONOTONIC, &now);
+    wlr_scene_output_send_frame_done(so, &now);
+    
+    send_frame(s);
+}
+
+static void output_destroy(struct wl_listener *listener, void *data) {
+    struct server *s = wl_container_of(listener, s, output_destroy);
+    wl_list_remove(&s->output_frame.link);
+    wl_list_remove(&s->output_destroy.link);
+}
+
+static void new_output(struct wl_listener *l, void *d) {
+    struct server *s = wl_container_of(l, s, new_output);
+    struct wlr_output *out = d;
+    
+    wlr_output_init_render(out, s->allocator, s->renderer);
+    
+    struct wlr_output_state state;
+    wlr_output_state_init(&state);
+    wlr_output_state_set_enabled(&state, true);
+    wlr_output_state_set_custom_mode(&state, s->width, s->height, 60000);
+    wlr_output_commit_state(out, &state);
+    wlr_output_state_finish(&state);
+    
+    wlr_output_layout_add_auto(s->output_layout, out);
+    s->output = out;
+    s->scene_output = wlr_scene_output_create(s->scene, out);
+    
+    s->output_frame.notify = output_frame;
+    wl_signal_add(&out->events.frame, &s->output_frame);
+    s->output_destroy.notify = output_destroy;
+    wl_signal_add(&out->events.destroy, &s->output_destroy);
+    
+    wlr_log(WLR_INFO, "Output ready: %dx%d", s->width, s->height);
+}
+
+static void new_input(struct wl_listener *l, void *d) {
+    struct server *s = wl_container_of(l, s, new_input);
+    struct wlr_input_device *dev = d;
+    if (dev->type == WLR_INPUT_DEVICE_POINTER)
+        wlr_cursor_attach_input_device(s->cursor, dev);
+}
+
+/* Cleanup function for graceful shutdown */
+static void server_cleanup(struct server *s) {
+    free_tile_groups(&s->dedup);
+    free(s->framebuf);
+    free(s->prev_framebuf);
+    free(s->tilebuf);
+    free(s->compbuf);
+    free(s->sendbuf);
+    free(s->pending_tiles);
+    if (s->udp_fd >= 0)
+        close(s->udp_fd);
+}
+
+int main(int argc, char *argv[]) {
+    if (argc < 2) {
+        fprintf(stderr, "Usage: %s <plan9-ip> [port]\
+", argv[0]);
+        return 1;
+    }
+    
+    signal(SIGPIPE, SIG_IGN);
+    wlr_log_init(WLR_INFO, NULL);
+    
+    struct server s = {0};
+    wl_list_init(&s.toplevels);
+    
+    /* UDP socket */
+    s.udp_fd = socket(AF_INET, SOCK_DGRAM, 0);
+    if (s.udp_fd < 0) { perror("socket"); return 1; }
+    
+    int port = (argc > 2) ? atoi(argv[2]) : 5556;
+    s.p9_addr.sin_family = AF_INET;
+    s.p9_addr.sin_port = htons(port);
+    if (inet_pton(AF_INET, argv[1], &s.p9_addr.sin_addr) != 1) {
+        fprintf(stderr, "Invalid address: %s\
+", argv[1]);
+        close(s.udp_fd);
+        return 1;
+    }
+    
+    wlr_log(WLR_INFO, "Connecting to %s:%d", argv[1], port);
+    
+    /* Get size from Plan 9 */
+    s.width = 1024; s.height = 768;
+    if (get_size(s.udp_fd, &s.p9_addr, &s.width, &s.height) < 0) {
+        wlr_log(WLR_ERROR, "Failed to get size, using %dx%d", s.width, s.height);
+    }
+    
+    s.tiles_x = (s.width + TILE_SIZE - 1) / TILE_SIZE;
+    s.tiles_y = (s.height + TILE_SIZE - 1) / TILE_SIZE;
+    int total_tiles = s.tiles_x * s.tiles_y;
+    
+    s.framebuf = calloc(s.width * s.height, 4);
+    s.prev_framebuf = calloc(s.width * s.height, 4);
+    s.tilebuf = malloc(TILE_SIZE * TILE_SIZE * 4);
+    s.compbuf_size = TILE_SIZE * TILE_SIZE * 4 * 2;
+    s.compbuf = malloc(s.compbuf_size);
+    s.sendbuf = malloc(P9WL_MTU + 64);
+    s.pending_tiles = malloc(total_tiles * sizeof(int));
+    
+    /* Check all allocations */
+    if (!s.framebuf || !s.prev_framebuf || !s.tilebuf || 
+        !s.compbuf || !s.sendbuf || !s.pending_tiles) {
+        wlr_log(WLR_ERROR, "Memory allocation failed");
+        server_cleanup(&s);
+        return 1;
+    }
+    
+    s.num_pending = 0;
+    s.batch_active = 0;
+    s.force_full_frame = 1;
+    s.last_full_frame_ms = now_ms();
+    s.global_frame_count = 0;
+    
+    /* Wayland - force Pixman renderer for buffer access */
+    setenv("WLR_RENDERER", "pixman", 1);
+    setenv("WLR_SCENE_DISABLE_DIRECT_SCANOUT", "1", 1);
+    
+    s.display = wl_display_create();
+    s.backend = wlr_headless_backend_create(wl_display_get_event_loop(s.display));
+    if (!s.backend) { wlr_log(WLR_ERROR, "Backend failed"); server_cleanup(&s); return 1; }
+    
+    s.renderer = wlr_renderer_autocreate(s.backend);
+    if (!s.renderer) { wlr_log(WLR_ERROR, "Renderer failed"); server_cleanup(&s); return 1; }
+    wlr_renderer_init_wl_display(s.renderer, s.display);
+    
+    s.allocator = wlr_allocator_autocreate(s.backend, s.renderer);
+    if (!s.allocator) { wlr_log(WLR_ERROR, "Allocator failed"); server_cleanup(&s); return 1; }
+    
+    wlr_compositor_create(s.display, 5, s.renderer);
+    wlr_subcompositor_create(s.display);
+    wlr_data_device_manager_create(s.display);
+    
+    s.output_layout = wlr_output_layout_create(s.display);
+    s.scene = wlr_scene_create();
+    wlr_scene_attach_output_layout(s.scene, s.output_layout);
+    
+    s.xdg_shell = wlr_xdg_shell_create(s.display, 3);
+    s.new_xdg_toplevel.notify = new_toplevel;
+    wl_signal_add(&s.xdg_shell->events.new_toplevel, &s.new_xdg_toplevel);
+    
+    s.cursor = wlr_cursor_create();
+    wlr_cursor_attach_output_layout(s.cursor, s.output_layout);
+    s.cursor_mgr = wlr_xcursor_manager_create(NULL, 24);
+    
+    s.seat = wlr_seat_create(s.display, "seat0");
+    wlr_seat_set_capabilities(s.seat, WL_SEAT_CAPABILITY_POINTER | WL_SEAT_CAPABILITY_KEYBOARD);
+    
+    /* Virtual keyboard */
+    wlr_keyboard_init(&s.virtual_kb, NULL, "virtual-keyboard");
+    struct xkb_context *ctx = xkb_context_new(XKB_CONTEXT_NO_FLAGS);
+    struct xkb_keymap *km = xkb_keymap_new_from_names(ctx, NULL, XKB_KEYMAP_COMPILE_NO_FLAGS);
+    wlr_keyboard_set_keymap(&s.virtual_kb, km);
+    xkb_keymap_unref(km);
+    xkb_context_unref(ctx);
+    wlr_seat_set_keyboard(s.seat, &s.virtual_kb);
+    
+    s.new_output.notify = new_output;
+    wl_signal_add(&s.backend->events.new_output, &s.new_output);
+    s.new_input.notify = new_input;
+    wl_signal_add(&s.backend->events.new_input, &s.new_input);
+    
+    wlr_headless_add_output(s.backend, s.width, s.height);
+    
+    const char *sock = wl_display_add_socket_auto(s.display);
+    if (!sock) { wlr_log(WLR_ERROR, "Socket failed"); server_cleanup(&s); return 1; }
+    
+    wlr_log(WLR_INFO, "WAYLAND_DISPLAY=%s (%dx%d)", sock, s.width, s.height);
+    setenv("WAYLAND_DISPLAY", sock, 1);
+    
+    wl_event_loop_add_fd(wl_display_get_event_loop(s.display), s.udp_fd,
+                         WL_EVENT_READABLE, handle_udp, &s);
+    
+	s.batch_timer = wl_event_loop_add_timer(wl_display_get_event_loop(s.display),
+                                         batch_timer_cb, &s);
+
+    if (!wlr_backend_start(s.backend)) { wlr_log(WLR_ERROR, "Start failed"); server_cleanup(&s); return 1; }
+    
+    wlr_log(WLR_INFO, "Running");
+    wl_display_run(s.display);
+    
+    wl_display_destroy(s.display);
+    server_cleanup(&s);
+    return 0;
+}
--- /dev/null
+++ b/plan9/drawserv.c
@@ -1,0 +1,837 @@
+/*
+ * drawserv_udp.c - Plan 9 draw server with UDP (9front)
+ *
+ * Based on working drawserv.c (TCP version), converted to UDP.
+ * Receives LZ4 compressed tiles, NO XOR delta.
+ * 
+ * Modified: Accumulate tiles until FLUSH, keeping only most recent frame per tile.
+ *
+ * Build:
+ *   6c drawserv_udp.c lz4.c
+ *   6l -o drawserv_udp drawserv_udp.6 lz4.6
+ */
+
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <thread.h>
+#include <mouse.h>
+#include <keyboard.h>
+#include "lz4.h"
+
+/* Protocol */
+#define P9WL_MAGIC        0x50395749
+#define P9WL_VERSION      1
+#define P9WL_HDR_SIZE     20
+#define P9WL_MTU          1400
+#define P9WL_MAX_CHUNK    (P9WL_MTU - 44)
+
+#define P9WL_SIZE_REQ     0x01
+#define P9WL_SIZE_RESP    0x02
+#define P9WL_TILE         0x10
+#define P9WL_FLUSH        0x15
+#define P9WL_NACK         0x16
+#define P9WL_SCROLL       0x17
+#define P9WL_MOUSE        0x20
+#define P9WL_KEY          0x21
+#define P9WL_RESIZE       0x22
+
+#define P9WL_FLAG_COMPRESSED  0x01
+#define P9WL_FLAG_HAS_COPIES  0x02
+
+#define HDR_MAGIC     0
+#define HDR_VERSION   4
+#define HDR_TYPE      5
+#define HDR_FLAGS     6
+#define HDR_SCROLL_ID 7
+#define HDR_SEQ       8
+#define HDR_FRAME     12
+#define HDR_TIMESTAMP 16
+
+#define GET16(p) ((uint)(p)[0] | ((uint)(p)[1]<<8))
+#define GET32(p) ((ulong)(p)[0] | ((ulong)(p)[1]<<8) | ((ulong)(p)[2]<<16) | ((ulong)(p)[3]<<24))
+#define PUT16(p, v) do { (p)[0]=(uchar)(v); (p)[1]=(uchar)((v)>>8); } while(0)
+#define PUT32(p, v) do { (p)[0]=(uchar)(v); (p)[1]=(uchar)((v)>>8); (p)[2]=(uchar)((v)>>16); (p)[3]=(uchar)((v)>>24); } while(0)
+
+Mousectl *mctl;
+Keyboardctl *kctl;
+int mainstacksize = 32768;
+
+/* UDP */
+int udpdata = -1;
+uchar clienthdr[52];
+int clientknown = 0;
+Lock udplock;
+ulong sendseq = 0;
+int debug = 0;
+
+/* Display */
+Image *tileimg;
+int tileimgw, tileimgh;
+
+/* Packet channel */
+typedef struct Packet Packet;
+struct Packet {
+	uchar hdr[52];
+	uchar data[2048];
+	int len;
+};
+Channel *pktch;
+
+/* Tile reassembly for multi-chunk tiles */
+typedef struct Pending Pending;
+struct Pending {
+	int x, y, w, h;
+	int flags;
+	int chunk_count;
+	int total_size;
+	uchar *data;
+	uchar *got;
+	int ngot;
+	ulong frame;
+	vlong when;
+};
+
+#define MAXPENDING 64
+Pending pending[MAXPENDING];
+Lock pendlock;
+
+#define MAX_COPIES 512
+
+/* Drop counters for diagnostics */
+long drop_pktch = 0;
+long tiles_drawn = 0;
+long tiles_failed = 0;
+long tiles_superseded = 0;
+ulong last_stats = 0;
+
+/* Frame tile counting */
+ulong current_frame = 0;
+
+/* Tile tracking */
+#define TILE_SIZE 16
+#define MAX_TILES_X 256
+#define MAX_TILES_Y 256
+
+/* Inline decompression buffer */
+uchar *inline_decbuf = nil;
+int inline_decbuf_size = 0;
+
+/*
+ * Tile accumulation buffer
+ * 
+ * Tiles are buffered here until FLUSH. Each slot holds decompressed
+ * pixels for one tile position. If multiple packets arrive for the
+ * same tile, only the one with the highest frame number is kept.
+ */
+typedef struct AccumTile AccumTile;
+struct AccumTile {
+	int valid;
+	ulong frame;
+	int x, y, w, h;
+	uchar *pixels;      /* Decompressed XBGR, w*h*4 bytes */
+	int pixelsize;      /* Allocated size of pixels buffer */
+};
+
+#define MAX_ACCUM (MAX_TILES_X * MAX_TILES_Y)
+AccumTile accum[MAX_ACCUM];
+
+int
+tile_index(int x, int y)
+{
+	int tx, ty;
+	
+	tx = x / TILE_SIZE;
+	ty = y / TILE_SIZE;
+	if(tx < 0 || tx >= MAX_TILES_X || ty < 0 || ty >= MAX_TILES_Y)
+		return -1;
+	return ty * MAX_TILES_X + tx;
+}
+
+/*
+ * Store a decompressed tile in the accumulation buffer.
+ * Returns 1 if stored, 0 if superseded by a newer frame.
+ */
+int
+accum_tile(int x, int y, int w, int h, ulong frame, uchar *pixels)
+{
+	int idx, nbytes;
+	AccumTile *t;
+	
+	idx = tile_index(x, y);
+	if(idx < 0)
+		return 0;
+	
+	t = &accum[idx];
+	
+	/* Only update if this frame is newer or equal */
+	if(t->valid && frame < t->frame){
+		tiles_superseded++;
+		return 0;
+	}
+	
+	nbytes = w * h * 4;
+	
+	/* Allocate or grow pixel buffer if needed */
+	if(t->pixels == nil || t->pixelsize < nbytes){
+		free(t->pixels);
+		t->pixels = malloc(nbytes);
+		if(t->pixels == nil)
+			return 0;
+		t->pixelsize = nbytes;
+	}
+	
+	memmove(t->pixels, pixels, nbytes);
+	t->x = x;
+	t->y = y;
+	t->w = w;
+	t->h = h;
+	t->frame = frame;
+	t->valid = 1;
+	return 1;
+}
+
+/*
+ * Draw all accumulated tiles to screen, then clear the buffer.
+ */
+void
+flush_accum(void)
+{
+	int i, count;
+	AccumTile *t;
+	Rectangle rect, dst;
+	
+	count = 0;
+	for(i = 0; i < MAX_ACCUM; i++){
+		t = &accum[i];
+		if(!t->valid)
+			continue;
+		
+		/* Ensure tileimg is big enough */
+		if(tileimg == nil || t->w > tileimgw || t->h > tileimgh){
+			if(tileimg)
+				freeimage(tileimg);
+			tileimgw = (t->w + 63) & ~63;
+			tileimgh = (t->h + 63) & ~63;
+			tileimg = allocimage(display, Rect(0, 0, tileimgw, tileimgh), XBGR32, 0, DNofill);
+			if(tileimg == nil){
+				fprint(2, "allocimage %dx%d failed: %r\n", tileimgw, tileimgh);
+				t->valid = 0;
+				continue;
+			}
+		}
+		
+		rect = Rect(0, 0, t->w, t->h);
+		if(loadimage(tileimg, rect, t->pixels, t->w * t->h * 4) < 0){
+			fprint(2, "loadimage failed: %r\n");
+			t->valid = 0;
+			continue;
+		}
+		
+		dst.min = addpt(screen->r.min, Pt(t->x, t->y));
+		dst.max = addpt(dst.min, Pt(t->w, t->h));
+		draw(screen, dst, tileimg, nil, rect.min);
+		
+		t->valid = 0;
+		count++;
+	}
+	
+	tiles_drawn += count;
+	flushimage(display, 1);
+}
+
+ulong
+now_ms(void)
+{
+	return nsec() / 1000000;
+}
+
+void
+udpsend(uchar *pkt, int len)
+{
+	uchar buf[2048];
+	
+	if(udpdata < 0 || !clientknown)
+		return;
+	
+	lock(&udplock);
+	if(len + 52 <= sizeof(buf)){
+		memmove(buf, clienthdr, 52);
+		memmove(buf + 52, pkt, len);
+		write(udpdata, buf, 52 + len);
+	}
+	unlock(&udplock);
+}
+
+void
+send_size_resp(void)
+{
+	uchar pkt[24];
+	
+	PUT32(pkt + HDR_MAGIC, P9WL_MAGIC);
+	pkt[HDR_VERSION] = P9WL_VERSION;
+	pkt[HDR_TYPE] = P9WL_SIZE_RESP;
+	pkt[HDR_FLAGS] = 0;
+	pkt[7] = 0;
+	PUT32(pkt + HDR_SEQ, sendseq++);
+	PUT32(pkt + HDR_FRAME, 0);
+	PUT32(pkt + HDR_TIMESTAMP, now_ms());
+	PUT16(pkt + 20, Dx(screen->r));
+	PUT16(pkt + 22, Dy(screen->r));
+	
+	//fprint(2, "SIZE_RESP: %dx%d\n", Dx(screen->r), Dy(screen->r));
+	udpsend(pkt, 24);
+}
+
+void
+send_mouse(int x, int y, int b)
+{
+	uchar pkt[28];
+	
+	if(!clientknown)
+		return;
+	
+	PUT32(pkt + HDR_MAGIC, P9WL_MAGIC);
+	pkt[HDR_VERSION] = P9WL_VERSION;
+	pkt[HDR_TYPE] = P9WL_MOUSE;
+	pkt[HDR_FLAGS] = 0;
+	pkt[7] = 0;
+	PUT32(pkt + HDR_SEQ, sendseq++);
+	PUT32(pkt + HDR_FRAME, 0);
+	PUT32(pkt + HDR_TIMESTAMP, now_ms());
+	PUT16(pkt + 20, x);
+	PUT16(pkt + 22, y);
+	pkt[24] = b;
+	pkt[25] = pkt[26] = pkt[27] = 0;
+	
+	udpsend(pkt, 28);
+}
+
+void
+send_key(Rune k)
+{
+	uchar pkt[28];
+	
+	if(!clientknown)
+		return;
+	
+	PUT32(pkt + HDR_MAGIC, P9WL_MAGIC);
+	pkt[HDR_VERSION] = P9WL_VERSION;
+	pkt[HDR_TYPE] = P9WL_KEY;
+	pkt[HDR_FLAGS] = 0;
+	pkt[7] = 0;
+	PUT32(pkt + HDR_SEQ, sendseq++);
+	PUT32(pkt + HDR_FRAME, 0);
+	PUT32(pkt + HDR_TIMESTAMP, now_ms());
+	PUT32(pkt + 20, k);
+	pkt[24] = 1;
+	pkt[25] = pkt[26] = pkt[27] = 0;
+	
+	udpsend(pkt, 28);
+}
+
+void
+send_resize(int w, int h)
+{
+	uchar pkt[24];
+	int i;
+	
+	if(!clientknown)
+		return;
+	
+	PUT32(pkt + HDR_MAGIC, P9WL_MAGIC);
+	pkt[HDR_VERSION] = P9WL_VERSION;
+	pkt[HDR_TYPE] = P9WL_RESIZE;
+	pkt[HDR_FLAGS] = 0;
+	pkt[7] = 0;
+	PUT32(pkt + HDR_SEQ, sendseq++);
+	PUT32(pkt + HDR_FRAME, 0);
+	PUT32(pkt + HDR_TIMESTAMP, now_ms());
+	PUT16(pkt + 20, w);
+	PUT16(pkt + 22, h);
+	
+	for(i = 0; i < 5; i++){
+		udpsend(pkt, 24);
+		sleep(20);
+	}
+}
+
+Pending*
+get_pending(int x, int y, int w, int h, int chunk_count, int total_size, int flags, ulong frame)
+{
+	Pending *p;
+	vlong now;
+	int i;
+	
+	now = nsec();
+	lock(&pendlock);
+	
+	/* Find existing */
+	for(i = 0; i < MAXPENDING; i++){
+		p = &pending[i];
+		if(p->data && p->x == x && p->y == y && p->w == w && p->h == h && p->frame == frame){
+			unlock(&pendlock);
+			return p;
+		}
+	}
+	
+	/* Find free slot (or oldest) */
+	Pending *oldest = &pending[0];
+	for(i = 0; i < MAXPENDING; i++){
+		p = &pending[i];
+		if(p->data == nil){
+			oldest = p;
+			break;
+		}
+		if(p->when < oldest->when)
+			oldest = p;
+	}
+	
+	p = oldest;
+	free(p->data);
+	free(p->got);
+	
+	p->x = x;
+	p->y = y;
+	p->w = w;
+	p->h = h;
+	p->flags = flags;
+	p->chunk_count = chunk_count;
+	p->total_size = total_size;
+	p->data = malloc(total_size);
+	p->got = malloc(chunk_count);
+	p->ngot = 0;
+	p->frame = frame;
+	p->when = now;
+	
+	if(p->data && p->got)
+		memset(p->got, 0, chunk_count);
+	
+	unlock(&pendlock);
+	return p;
+}
+
+int
+add_chunk(Pending *p, int idx, uchar *data, int len)
+{
+	int offset;
+	
+	if(p == nil || p->data == nil || p->got == nil)
+		return 0;
+	if(idx < 0 || idx >= p->chunk_count)
+		return 0;
+	if(p->got[idx])
+		return 0;
+	
+	offset = idx * P9WL_MAX_CHUNK;
+	if(offset + len > p->total_size)
+		len = p->total_size - offset;
+	
+	memmove(p->data + offset, data, len);
+	p->got[idx] = 1;
+	p->ngot++;
+	
+	return p->ngot >= p->chunk_count;
+}
+
+void
+handle_tile(uchar *pkt, int len, int flags, ulong frame)
+{
+	int x, y, w, h;
+	int chunk_idx, chunk_count;
+	int total_size, chunk_size;
+	uchar *data;
+	Pending *p;
+	int num_copies, i;
+	int copy_offset;
+	
+	if(len < 40)
+		return;
+	
+	x = GET16(pkt + 20);
+	y = GET16(pkt + 22);
+	w = GET16(pkt + 24);
+	h = GET16(pkt + 26);
+	chunk_idx = GET16(pkt + 28);
+	chunk_count = GET16(pkt + 30);
+	total_size = GET32(pkt + 32);
+	chunk_size = GET32(pkt + 36);
+	
+	data = pkt + 40;
+	if(len - 40 < (int)chunk_size)
+		return;
+	
+	/* Single chunk - decompress and accumulate */
+	if(chunk_count == 1){
+		int nbytes = w * h * 4;
+		int declen;
+		int copy_dx[MAX_COPIES], copy_dy[MAX_COPIES];
+		int nc = 0;
+		
+		/* Parse copy destinations first */
+		if(flags & P9WL_FLAG_HAS_COPIES){
+			copy_offset = 40 + chunk_size;
+			if(len >= copy_offset + 2){
+				num_copies = GET16(pkt + copy_offset);
+				if(num_copies > MAX_COPIES) num_copies = MAX_COPIES;
+				for(i = 0; i < num_copies && copy_offset + 2 + (i+1)*4 <= len; i++){
+					copy_dx[i] = GET16(pkt + copy_offset + 2 + i*4);
+					copy_dy[i] = GET16(pkt + copy_offset + 2 + i*4 + 2);
+				}
+				nc = i;
+			}
+		}
+		
+		/* Ensure decode buffer is large enough */
+		if(nbytes > inline_decbuf_size){
+			free(inline_decbuf);
+			inline_decbuf = malloc(nbytes);
+			inline_decbuf_size = inline_decbuf ? nbytes : 0;
+		}
+		if(inline_decbuf == nil){
+			tiles_failed++;
+			return;
+		}
+		
+		/* Decompress */
+		if(flags & P9WL_FLAG_COMPRESSED){
+			declen = LZ4_decompress_safe((char*)data, (char*)inline_decbuf, chunk_size, nbytes);
+			if(declen != nbytes){
+				tiles_failed++;
+				return;
+			}
+		} else {
+			if((int)chunk_size != nbytes){
+				tiles_failed++;
+				return;
+			}
+			memmove(inline_decbuf, data, nbytes);
+		}
+		
+		/* Accumulate primary tile */
+		accum_tile(x, y, w, h, frame, inline_decbuf);
+		
+		/* Accumulate copies (same pixels, different positions) */
+		for(i = 0; i < nc; i++){
+			accum_tile(copy_dx[i], copy_dy[i], w, h, frame, inline_decbuf);
+		}
+		return;
+	}
+	
+	/* Multi-chunk - reassemble first */
+	p = get_pending(x, y, w, h, chunk_count, total_size, flags, frame);
+	if(p == nil)
+		return;
+	
+	if(!add_chunk(p, chunk_idx, data, chunk_size))
+		return;
+	
+	/* Complete - decompress and accumulate */
+	{
+		int nbytes = w * h * 4;
+		int declen;
+		uchar *tiledata = p->data;
+		int tilesize = p->total_size;
+		int tileflags = p->flags;
+		
+		/* Ensure decode buffer is large enough */
+		if(nbytes > inline_decbuf_size){
+			free(inline_decbuf);
+			inline_decbuf = malloc(nbytes);
+			inline_decbuf_size = inline_decbuf ? nbytes : 0;
+		}
+		
+		lock(&pendlock);
+		free(p->got);
+		p->got = nil;
+		p->data = nil;
+		unlock(&pendlock);
+		
+		if(inline_decbuf == nil){
+			tiles_failed++;
+			free(tiledata);
+			return;
+		}
+		
+		/* Decompress */
+		if(tileflags & P9WL_FLAG_COMPRESSED){
+			declen = LZ4_decompress_safe((char*)tiledata, (char*)inline_decbuf, tilesize, nbytes);
+			free(tiledata);
+			if(declen != nbytes){
+				tiles_failed++;
+				return;
+			}
+		} else {
+			if(tilesize != nbytes){
+				tiles_failed++;
+				free(tiledata);
+				return;
+			}
+			memmove(inline_decbuf, tiledata, nbytes);
+			free(tiledata);
+		}
+		
+		/* Accumulate tile */
+		accum_tile(x, y, w, h, frame, inline_decbuf);
+	}
+}
+
+void
+handle_packet(uchar *pkt, int len, uchar *hdr)
+{
+	ulong magic, frame, seq;
+	int type, flags;
+	static ulong last_seq = 0;
+	static int seq_init = 0;
+	static long total_gaps = 0;
+	static long total_pkts = 0;
+	
+	if(len < P9WL_HDR_SIZE)
+		return;
+	
+	magic = GET32(pkt + HDR_MAGIC);
+	if(magic != P9WL_MAGIC)
+		return;
+	
+	if(pkt[HDR_VERSION] != P9WL_VERSION)
+		return;
+	
+	/* Save client address */
+	if(!clientknown){
+		lock(&udplock);
+		memmove(clienthdr, hdr, 52);
+		clientknown = 1;
+		unlock(&udplock);
+		if(debug)
+		fprint(2, "client connected\n");
+	}
+	
+	type = pkt[HDR_TYPE];
+	flags = pkt[HDR_FLAGS];
+	frame = GET32(pkt + HDR_FRAME);
+	seq = GET32(pkt + HDR_SEQ);
+	
+	/* Track sequence gaps */
+	total_pkts++;
+	if(seq_init){
+		if(seq != last_seq + 1 && seq > last_seq){
+			long gap = seq - last_seq - 1;
+			total_gaps += gap;
+			if(debug)
+			fprint(2, "SEQ GAP: expected %uld got %uld (lost %ld, total lost %ld/%ld)\n", 
+			       last_seq + 1, seq, gap, total_gaps, total_pkts);
+		}
+	}
+	last_seq = seq;
+	seq_init = 1;
+	
+	switch(type){
+	case P9WL_SIZE_REQ:
+		send_size_resp();
+		break;
+	case P9WL_TILE:
+		handle_tile(pkt, len, flags, frame);
+		break;
+	case P9WL_FLUSH:
+		/* Draw all accumulated tiles */
+		flush_accum();
+		break;
+	case P9WL_SCROLL:
+		/* Scroll disabled - ignore */
+		break;
+	}
+}
+
+void
+netproc(void *arg)
+{
+	char *port = arg;
+	char buf[64], ldir[40], ctlpath[40];
+	int n, acfd, ctlfd;
+	Packet *p;
+	
+	/* Pre-allocate packet pool */
+	#define PACKET_POOL_SIZE 8192
+	static Packet packet_pool[PACKET_POOL_SIZE];
+	static int pool_idx = 0;
+	
+	acfd = open("/net/udp/clone", ORDWR);
+	if(acfd < 0){
+		fprint(2, "open /net/udp/clone: %r\n");
+		threadexitsall("udp");
+	}
+	
+	n = read(acfd, buf, sizeof(buf)-1);
+	if(n <= 0){
+		fprint(2, "read clone: %r\n");
+		threadexitsall("udp");
+	}
+	buf[n] = 0;
+	
+	/* Try to increase UDP receive buffer */
+	snprint(ctlpath, sizeof(ctlpath), "/net/udp/%s/ctl", buf);
+	ctlfd = open(ctlpath, OWRITE);
+	if(ctlfd >= 0){
+		fprint(ctlfd, "rcvbuf 4194304");
+		close(ctlfd);
+	}
+	
+	snprint(ldir, sizeof(ldir), "/net/udp/%s/data", buf);
+	
+	if(fprint(acfd, "headers") < 0){
+		fprint(2, "headers: %r\n");
+		threadexitsall("udp");
+	}
+	
+	if(fprint(acfd, "announce %s", port) < 0){
+		fprint(2, "announce %s: %r\n", port);
+		threadexitsall("udp");
+	}
+	
+	udpdata = open(ldir, ORDWR);
+	if(udpdata < 0){
+		fprint(2, "open %s: %r\n", ldir);
+		threadexitsall("udp");
+	}
+	if(debug)
+	fprint(2, "listening on UDP port %s\n", port);
+	
+	for(;;){
+		p = &packet_pool[pool_idx];
+		pool_idx = (pool_idx + 1) % PACKET_POOL_SIZE;
+		
+		n = read(udpdata, p->data, sizeof(p->data));
+		if(n <= 52)
+			continue;
+		
+		memmove(p->hdr, p->data, 52);
+		p->len = n - 52;
+		memmove(p->data, p->data + 52, p->len);
+		
+		if(nbsend(pktch, &p) == 0){
+			drop_pktch++;
+		}
+	}
+}
+
+void
+threadmain(int argc, char *argv[])
+{
+	char *port;
+	Mouse m;
+	Rune k;
+	Packet *pkt;
+	Alt alts[5];
+	int i;
+	
+	port = "5556";
+	if(argc > 1)
+		port = argv[1];
+	
+	if(initdraw(nil, nil, "drawserv") < 0)
+		sysfatal("initdraw: %r");
+	
+	mctl = initmouse(nil, screen);
+	if(mctl == nil)
+		sysfatal("initmouse: %r");
+	
+	kctl = initkeyboard(nil);
+	if(kctl == nil)
+		sysfatal("initkeyboard: %r");
+	
+	pktch = chancreate(sizeof(Packet*), 8192);
+	
+	for(i = 0; i < MAXPENDING; i++){
+		pending[i].data = nil;
+		pending[i].got = nil;
+	}
+	
+	/* Initialize accumulation buffer */
+	for(i = 0; i < MAX_ACCUM; i++){
+		accum[i].valid = 0;
+		accum[i].pixels = nil;
+		accum[i].pixelsize = 0;
+	}
+	
+	draw(screen, screen->r, display->black, nil, ZP);
+	flushimage(display, 1);
+	if(debug)
+	fprint(2, "drawserv_udp %dx%d\n", Dx(screen->r), Dy(screen->r));
+	
+	proccreate(netproc, port, 32768);
+	
+	alts[0].c = mctl->c;
+	alts[0].v = &m;
+	alts[0].op = CHANRCV;
+	
+	alts[1].c = mctl->resizec;
+	alts[1].v = nil;
+	alts[1].op = CHANRCV;
+	
+	alts[2].c = kctl->c;
+	alts[2].v = &k;
+	alts[2].op = CHANRCV;
+	
+	alts[3].c = pktch;
+	alts[3].v = &pkt;
+	alts[3].op = CHANRCV;
+	
+	alts[4].op = CHANEND;
+	
+	last_stats = now_ms();
+	
+	for(;;){
+		/* Print stats every 5 seconds */
+		ulong now = now_ms();
+		if(now - last_stats > 5000){
+			if(drop_pktch || tiles_superseded)
+				if(debug)
+				fprint(2, "stats: drops=%ld superseded=%ld drawn=%ld\n", 
+				       drop_pktch, tiles_superseded, tiles_drawn);
+			drop_pktch = 0;
+			tiles_superseded = 0;
+			tiles_drawn = 0;
+			last_stats = now;
+		}
+		
+		switch(alt(alts)){
+		case 0:  /* mouse */
+			send_mouse(m.xy.x - screen->r.min.x,
+			          m.xy.y - screen->r.min.y,
+			          m.buttons);
+			break;
+			
+		case 1:  /* resize */
+			if(getwindow(display, Refnone) < 0)
+				sysfatal("getwindow: %r");
+			
+			if(tileimg){
+				freeimage(tileimg);
+				tileimg = nil;
+				tileimgw = tileimgh = 0;
+			}
+			
+			draw(screen, screen->r, display->black, nil, ZP);
+			flushimage(display, 1);
+			if(debug)
+			fprint(2, "resize: %dx%d\n", Dx(screen->r), Dy(screen->r));
+			send_resize(Dx(screen->r), Dy(screen->r));
+			break;
+			
+		case 2:  /* keyboard */
+			if(k == Kdel)
+				threadexitsall(nil);
+			send_key(k);
+			break;
+			
+		case 3:  /* network packet */
+			handle_packet(pkt->data, pkt->len, pkt->hdr);
+			
+			/* Drain ALL pending packets before checking other channels */
+			while(nbrecv(pktch, &pkt) > 0){
+				handle_packet(pkt->data, pkt->len, pkt->hdr);
+			}
+			break;
+		}
+	}
+}
--- /dev/null
+++ b/plan9/lz4.c
@@ -1,0 +1,154 @@
+/*
+ * lz4.c - Minimal LZ4 block decompressor for Plan 9
+ *
+ * This implements only LZ4_decompress_safe() which is all drawserv needs.
+ * The Linux side uses the real liblz4 for compression.
+ *
+ * LZ4 block format is simple:
+ * - Sequences of: [token] [literals...] [offset] [extra match len...]
+ * - Token high nibble: literal length (15 = more bytes follow)
+ * - Token low nibble: match length - 4 (15 = more bytes follow)
+ * - Offset: 2 bytes little-endian, distance back to copy from
+ *
+ * Public domain / Unlicense - use freely
+ */
+
+#include <u.h>
+#include <libc.h>
+#include "lz4.h"
+
+#define LZ4_MIN_MATCH 4
+
+/*
+ * Read variable-length integer
+ * If initial value is 15, keep adding bytes until one is < 255
+ */
+static int
+readvlen(uchar **pp, uchar *end, int initial)
+{
+	int len = initial;
+	uchar *p = *pp;
+	
+	if(initial == 15){
+		int s;
+		do {
+			if(p >= end)
+				return -1;
+			s = *p++;
+			len += s;
+		} while(s == 255);
+	}
+	*pp = p;
+	return len;
+}
+
+/*
+ * LZ4_decompress_safe - decompress LZ4 block with bounds checking
+ *
+ * src: compressed data
+ * dst: output buffer (must be pre-allocated)
+ * compressedSize: exact size of compressed data
+ * dstCapacity: size of output buffer
+ *
+ * Returns: number of bytes decompressed, or negative on error
+ */
+int
+LZ4_decompress_safe(char *src, char *dst, int compressedSize, int dstCapacity)
+{
+	uchar *ip = (uchar*)src;           /* input pointer */
+	uchar *iend = ip + compressedSize; /* input end */
+	uchar *op = (uchar*)dst;           /* output pointer */
+	uchar *oend = op + dstCapacity;    /* output end */
+	uchar *cpy;
+	uchar *match;
+	int token, litlen, matchlen;
+	int offset;
+	
+	if(compressedSize <= 0 || dstCapacity <= 0)
+		return -1;
+	
+	for(;;){
+		/* Get token */
+		if(ip >= iend)
+			return -1;
+		token = *ip++;
+		
+		/* Decode literal length */
+		litlen = token >> 4;
+		litlen = readvlen(&ip, iend, litlen);
+		if(litlen < 0)
+			return -1;
+		
+		/* Copy literals */
+		cpy = op + litlen;
+		if(cpy > oend || ip + litlen > iend){
+			/* Check for valid end condition */
+			if(cpy == oend && ip + litlen == iend){
+				/* Last sequence - just literals, no match */
+				memmove(op, ip, litlen);
+				return cpy - (uchar*)dst;
+			}
+			return -1;  /* overflow */
+		}
+		memmove(op, ip, litlen);
+		op = cpy;
+		ip += litlen;
+		
+		/* Check for end of block */
+		if(ip >= iend)
+			break;
+		
+		/* Decode match offset (little-endian) */
+		if(ip + 2 > iend)
+			return -1;
+		offset = ip[0] | (ip[1] << 8);
+		ip += 2;
+		
+		if(offset == 0)
+			return -1;  /* invalid offset */
+		
+		match = op - offset;
+		if(match < (uchar*)dst)
+			return -1;  /* offset too far back */
+		
+		/* Decode match length */
+		matchlen = (token & 0x0F) + LZ4_MIN_MATCH;
+		if((token & 0x0F) == 15){
+			int extra = readvlen(&ip, iend, 15) - 15;
+			if(extra < 0)
+				return -1;
+			matchlen += extra;
+		}
+		
+		/* Copy match */
+		cpy = op + matchlen;
+		if(cpy > oend)
+			return -1;  /* output overflow */
+		
+		/*
+		 * Copy match bytes - must handle overlapping copies
+		 * (when offset < matchlen, we repeat the pattern)
+		 */
+		if(offset >= matchlen){
+			/* Non-overlapping - fast copy */
+			memmove(op, match, matchlen);
+			op = cpy;
+		} else {
+			/* Overlapping - copy byte by byte */
+			while(op < cpy)
+				*op++ = *match++;
+		}
+	}
+	
+	return op - (uchar*)dst;
+}
+
+/*
+ * LZ4_compressBound - maximum compressed size for given input
+ * Only needed if compressing on Plan 9 (unlikely)
+ */
+int
+LZ4_compressBound(int inputSize)
+{
+	return inputSize + (inputSize/255) + 16;
+}
--- /dev/null
+++ b/plan9/lz4.h
@@ -1,0 +1,11 @@
+/*
+ * lz4.h - Minimal LZ4 decompressor for Plan 9
+ *
+ * Only implements decompression - compression happens on Linux side.
+ */
+
+/* Decompress LZ4 block. Returns decompressed size or negative on error. */
+int LZ4_decompress_safe(char *src, char *dst, int compressedSize, int dstCapacity);
+
+/* Max compressed size for given input (only needed for compression) */
+int LZ4_compressBound(int inputSize);
--- /dev/null
+++ b/plan9/mkfile
@@ -1,0 +1,13 @@
+</$objtype/mkfile
+
+TARG=drawserv
+
+OFILES=\
+	drawserv.$O\
+	lz4.$O\
+
+CFLAGS=-FTVw
+
+BIN=/$objtype/bin
+
+</sys/src/cmd/mkone
--