shithub: wl9m

ref: 9c25e3a38f465444d684a58a24f7567e6b59299c
dir: /linux/p9wl.c/

View raw version
/*
 * p9wl_udp.c - Wayland compositor with UDP transport to Plan 9
 *
 * Based on working p9wl.c (TCP version), converted to UDP.
 * Uses damage tracking from wlroots, LZ4 compression, NO XOR delta.
 *
 * Build:
 *   gcc -O2 -o p9wl_udp p9wl_udp.c \
 *       $(pkg-config --cflags --libs wlroots-0.19 wayland-server xkbcommon pixman-1) \
 *       -llz4 -lm -DWLR_USE_UNSTABLE
 *
 * For wlroots-0.18:
 *   gcc -O2 -o p9wl_udp p9wl_udp.c \
 *       $(pkg-config --cflags --libs wlroots-0.18 wayland-server xkbcommon pixman-1) \
 *       -llz4 -lm -DWLR_USE_UNSTABLE
 */

#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdbool.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <poll.h>
#include <time.h>
#include <signal.h>
#include <linux/input-event-codes.h>
#include <pixman.h>
#include <lz4.h>

#include <wayland-server-core.h>
#include <wlr/backend.h>
#include <wlr/backend/headless.h>
#include <wlr/render/allocator.h>
#include <wlr/render/wlr_renderer.h>
#include <wlr/types/wlr_buffer.h>
#include <wlr/types/wlr_compositor.h>
#include <wlr/types/wlr_cursor.h>
#include <wlr/types/wlr_data_device.h>
#include <wlr/types/wlr_output.h>
#include <wlr/types/wlr_output_layout.h>
#include <wlr/types/wlr_scene.h>
#include <wlr/types/wlr_seat.h>
#include <wlr/types/wlr_subcompositor.h>
#include <wlr/types/wlr_xcursor_manager.h>
#include <wlr/types/wlr_xdg_shell.h>
#include <wlr/types/wlr_keyboard.h>
#include <wlr/interfaces/wlr_keyboard.h>
#include <wlr/util/log.h>
#include <xkbcommon/xkbcommon.h>

/* UDP Protocol */
#define P9WL_MAGIC        0x50395749
#define P9WL_VERSION      1
#define P9WL_HDR_SIZE     20
#define P9WL_MTU          1400
#define P9WL_MAX_CHUNK    (P9WL_MTU - 44)
#define TILE_SIZE         16  

/* Message types */
#define P9WL_SIZE_REQ     0x01
#define P9WL_SIZE_RESP    0x02
#define P9WL_TILE         0x10
#define P9WL_FLUSH        0x15
#define P9WL_NACK         0x16
#define P9WL_SCROLL       0x17
#define P9WL_MOUSE        0x20
#define P9WL_KEY          0x21
#define P9WL_RESIZE       0x22

/* Flags */
#define P9WL_FLAG_COMPRESSED  0x01
#define P9WL_FLAG_HAS_COPIES  0x02

/* Header field offsets */
#define HDR_MAGIC     0
#define HDR_VERSION   4
#define HDR_TYPE      5
#define HDR_FLAGS     6
#define HDR_SEQ       8
#define HDR_FRAME     12
#define HDR_TIMESTAMP 16

/* Configuration constants */
#define FULL_FRAME_INTERVAL_MS   5000   /* Force full frame every 5 seconds */
#define MIN_BATCH_DELAY_US       1    /* Minimum delay between batch packets */
#define PACKET_DELAY_US          0      /* Inter-packet delay to prevent overflow */
#define COMPRESSION_THRESHOLD    75     /* Compress if result < 75% of original */
#define BATCH_INTERVAL_MS        33     /* Spread tiles over 50ms */
#define SIZE_REQ_ATTEMPTS        10     /* Number of size request retries */
#define SIZE_REQ_TIMEOUT_MS      1000   /* Timeout per size request attempt */
#define MAX_SCREEN_DIM           8192   /* Maximum screen dimension */

#define GET16(p) ((uint16_t)(p)[0] | ((uint16_t)(p)[1]<<8))
#define GET32(p) ((uint32_t)(p)[0] | ((uint32_t)(p)[1]<<8) | ((uint32_t)(p)[2]<<16) | ((uint32_t)(p)[3]<<24))
#define PUT16(p, v) do { (p)[0]=(uint8_t)(v); (p)[1]=(uint8_t)((v)>>8); } while(0)
#define PUT32(p, v) do { (p)[0]=(uint8_t)(v); (p)[1]=(uint8_t)((v)>>8); (p)[2]=(uint8_t)((v)>>16); (p)[3]=(uint8_t)((v)>>24); } while(0)

/* Plan 9 special keys */
enum {
    KF = 0xF000,
    Khome = KF|13, Kup = KF|14, Kpgup = KF|15,
    Kleft = KF|17, Kright = KF|18,
    Kdown = KF|19, Kend = KF|20, Kpgdown = KF|21,
    Kins = KF|22, Kdel = 0x7F,
};

/* Key mapping */
struct key_map { int rune; int keycode; int shift; };
static const struct key_map keymap[] = {
    {8, KEY_BACKSPACE, 0}, {9, KEY_TAB, 0}, {10, KEY_ENTER, 0}, {13, KEY_ENTER, 0},
    {27, KEY_ESC, 0}, {Kdel, KEY_DELETE, 0},
    {Khome, KEY_HOME, 0}, {Kup, KEY_UP, 0}, {Kpgup, KEY_PAGEUP, 0},
    {Kleft, KEY_LEFT, 0}, {Kright, KEY_RIGHT, 0},
    {Kdown, KEY_DOWN, 0}, {Kend, KEY_END, 0}, {Kpgdown, KEY_PAGEDOWN, 0},
    {Kins, KEY_INSERT, 0},
    {'a', KEY_A, 0}, {'b', KEY_B, 0}, {'c', KEY_C, 0}, {'d', KEY_D, 0},
    {'e', KEY_E, 0}, {'f', KEY_F, 0}, {'g', KEY_G, 0}, {'h', KEY_H, 0},
    {'i', KEY_I, 0}, {'j', KEY_J, 0}, {'k', KEY_K, 0}, {'l', KEY_L, 0},
    {'m', KEY_M, 0}, {'n', KEY_N, 0}, {'o', KEY_O, 0}, {'p', KEY_P, 0},
    {'q', KEY_Q, 0}, {'r', KEY_R, 0}, {'s', KEY_S, 0}, {'t', KEY_T, 0},
    {'u', KEY_U, 0}, {'v', KEY_V, 0}, {'w', KEY_W, 0}, {'x', KEY_X, 0},
    {'y', KEY_Y, 0}, {'z', KEY_Z, 0},
    {'A', KEY_A, 1}, {'B', KEY_B, 1}, {'C', KEY_C, 1}, {'D', KEY_D, 1},
    {'E', KEY_E, 1}, {'F', KEY_F, 1}, {'G', KEY_G, 1}, {'H', KEY_H, 1},
    {'I', KEY_I, 1}, {'J', KEY_J, 1}, {'K', KEY_K, 1}, {'L', KEY_L, 1},
    {'M', KEY_M, 1}, {'N', KEY_N, 1}, {'O', KEY_O, 1}, {'P', KEY_P, 1},
    {'Q', KEY_Q, 1}, {'R', KEY_R, 1}, {'S', KEY_S, 1}, {'T', KEY_T, 1},
    {'U', KEY_U, 1}, {'V', KEY_V, 1}, {'W', KEY_W, 1}, {'X', KEY_X, 1},
    {'Y', KEY_Y, 1}, {'Z', KEY_Z, 1},
    {'0', KEY_0, 0}, {'1', KEY_1, 0}, {'2', KEY_2, 0}, {'3', KEY_3, 0},
    {'4', KEY_4, 0}, {'5', KEY_5, 0}, {'6', KEY_6, 0}, {'7', KEY_7, 0},
    {'8', KEY_8, 0}, {'9', KEY_9, 0},
    {' ', KEY_SPACE, 0}, {'!', KEY_1, 1}, {'@', KEY_2, 1}, {'#', KEY_3, 1},
    {'$', KEY_4, 1}, {'%', KEY_5, 1}, {'^', KEY_6, 1}, {'&', KEY_7, 1},
    {'*', KEY_8, 1}, {'(', KEY_9, 1}, {')', KEY_0, 1},
    {'-', KEY_MINUS, 0}, {'_', KEY_MINUS, 1}, {'=', KEY_EQUAL, 0}, {'+', KEY_EQUAL, 1},
    {'[', KEY_LEFTBRACE, 0}, {'{', KEY_LEFTBRACE, 1},
    {']', KEY_RIGHTBRACE, 0}, {'}', KEY_RIGHTBRACE, 1},
    {'\\', KEY_BACKSLASH, 0}, {'|', KEY_BACKSLASH, 1},
    {';', KEY_SEMICOLON, 0}, {':', KEY_SEMICOLON, 1},
    {'\'', KEY_APOSTROPHE, 0}, {'"', KEY_APOSTROPHE, 1},
    {',', KEY_COMMA, 0}, {'<', KEY_COMMA, 1},
    {'.', KEY_DOT, 0}, {'>', KEY_DOT, 1},
    {'/', KEY_SLASH, 0}, {'?', KEY_SLASH, 1},
    {'`', KEY_GRAVE, 0}, {'~', KEY_GRAVE, 1},
    {0, 0, 0}
};

/* Optimized FNV-1a hash for tile deduplication - processes 32-bit words */
static uint32_t hash_tile(uint32_t *pixels, int count) {
    uint32_t hash = 2166136261u;
    for (int i = 0; i < count; i++) {
        hash ^= pixels[i];
        hash *= 16777619u;
    }
    return hash;
}
 
 

/* Tile group for deduplication - tiles with same hash */
#define MAX_COPIES_PER_GROUP 4096
struct tile_group {
    uint32_t hash;
    int primary_idx;     /* First tile index (the one we'll actually send) */
    int *copy_x;         /* Dynamically allocated */
    int *copy_y;
    int num_copies;      /* Additional copies (not including primary) */
    int copy_capacity;
};

#define MAX_GROUPS 4096
struct tile_groups {
    struct tile_group groups[MAX_GROUPS];
    int num_groups;
    uint32_t *tile_hashes;  /* Hash for each tile index */
    int *tile_group;        /* Which group each tile belongs to (-1 if none) */
};

struct toplevel {
    struct wl_list link;
    struct wlr_xdg_toplevel *xdg;
    struct wlr_scene_tree *scene_tree;
    struct wl_listener map, unmap, commit, destroy;
    struct server *server;
};

struct server {
    struct wl_display *display;
    struct wlr_backend *backend;
    struct wlr_renderer *renderer;
    struct wlr_allocator *allocator;
    struct wlr_scene *scene;
    struct wlr_scene_output *scene_output;
    struct wlr_output_layout *output_layout;
    struct wlr_output *output;
    struct wlr_xdg_shell *xdg_shell;
    struct wlr_seat *seat;
    struct wlr_cursor *cursor;
    struct wlr_xcursor_manager *cursor_mgr;
    struct wlr_keyboard virtual_kb;
    
    struct wl_listener new_output, output_frame, output_destroy;
    struct wl_listener new_xdg_toplevel;
    struct wl_listener new_input;
    struct wl_list toplevels;
    
    int udp_fd;
    struct sockaddr_in p9_addr;
    
    int width, height;
    uint32_t send_seq, frame_seq;
    
    uint32_t *framebuf;
    uint32_t *prev_framebuf;
    uint32_t *tilebuf;      /* For XRGB->XBGR conversion */
    char *compbuf;
    int compbuf_size;
    uint8_t *sendbuf;
    
    int tiles_x, tiles_y;
    
    /* Damage tracking */
    int damage_x1, damage_y1, damage_x2, damage_y2;
    int has_damage;
    int force_full_frame;
    uint32_t last_full_frame_ms;
    
    /* Paced batch sending */
    struct wl_event_source *batch_timer;
    int *pending_tiles;      /* Tile indices to send */
    int num_pending;         /* How many tiles pending */
    int batch_idx;           /* Current index in pending_tiles */
    int batch_active;        /* Currently sending a batch */
    uint32_t batch_start_us; /* When batch started (microseconds) */
    int batch_interval_us;   /* Total time for batch (microseconds) */
    
    /* Tile deduplication */
    struct tile_groups dedup;
    
    /* Global frame counter (unified across functions) */
    int global_frame_count;
};

static uint32_t now_ms(void) {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
}

static uint64_t now_us(void) {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return (uint64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
}

/* Build protocol header - reduces code duplication */
static void build_header(uint8_t *pkt, uint8_t type, uint8_t flags,
                         uint32_t seq, uint32_t frame) {
    PUT32(pkt + HDR_MAGIC, P9WL_MAGIC);
    pkt[HDR_VERSION] = P9WL_VERSION;
    pkt[HDR_TYPE] = type;
    pkt[HDR_FLAGS] = flags;
    pkt[7] = 0;  
    PUT32(pkt + HDR_SEQ, seq);
    PUT32(pkt + HDR_FRAME, frame);
    PUT32(pkt + HDR_TIMESTAMP, now_ms());
}

static void send_pkt(struct server *s, uint8_t *pkt, int len) {
    sendto(s->udp_fd, pkt, len, 0, 
           (struct sockaddr*)&s->p9_addr, sizeof(s->p9_addr));
    
    /* Delay after every packet to prevent receiver buffer overflow */
    usleep(PACKET_DELAY_US);
}

static void send_flush(struct server *s) {
    uint8_t pkt[20];
    build_header(pkt, P9WL_FLUSH, 0, s->send_seq++, s->frame_seq);
    send_pkt(s, pkt, 20);
}
  
static int send_tile(struct server *s, int tx, int ty) {
    /* Bounds check */
    if (tx < 0 || tx >= s->tiles_x || ty < 0 || ty >= s->tiles_y)
        return 0;
    
    int x = tx * TILE_SIZE;
    int y = ty * TILE_SIZE;
    int w = (tx == s->tiles_x - 1) ? (s->width - x) : TILE_SIZE;
    int h = (ty == s->tiles_y - 1) ? (s->height - y) : TILE_SIZE;
    if (w <= 0 || h <= 0) return 0;
    
    /* Convert XRGB->XBGR and update prev */
    for (int row = 0; row < h; row++) {
        uint32_t *curr = &s->framebuf[(y + row) * s->width + x];
        uint32_t *prev = &s->prev_framebuf[(y + row) * s->width + x];
        uint32_t *dst = &s->tilebuf[row * w];
        for (int col = 0; col < w; col++) {
            uint32_t p = curr[col];
            /* XRGB -> XBGR: swap R and B */
            dst[col] = (p & 0xFF00FF00) | ((p >> 16) & 0xFF) | ((p & 0xFF) << 16);
        }
        memcpy(prev, curr, w * 4);
    }
    
    int raw_size = w * h * 4;
    int comp_size = LZ4_compress_default((char*)s->tilebuf, s->compbuf, raw_size, s->compbuf_size);
    
    uint8_t *data;
    int data_size;
    int compressed = 0;
    
    if (comp_size > 0 && comp_size < (raw_size * COMPRESSION_THRESHOLD) / 100) {
        data = (uint8_t*)s->compbuf;
        data_size = comp_size;
        compressed = 1;
    } else {
        data = (uint8_t*)s->tilebuf;
        data_size = raw_size;
    }
    
    /* Send in chunks */
    int chunk_count = (data_size + P9WL_MAX_CHUNK - 1) / P9WL_MAX_CHUNK;
    int offset = 0;
    
    for (int i = 0; i < chunk_count; i++) {
        int chunk_len = (i == chunk_count - 1) ? (data_size - offset) : P9WL_MAX_CHUNK;
        
        uint8_t *pkt = s->sendbuf;
        build_header(pkt, P9WL_TILE, compressed ? P9WL_FLAG_COMPRESSED : 0,
                     s->send_seq++, s->frame_seq);
        PUT16(pkt + 20, x);
        PUT16(pkt + 22, y);
        PUT16(pkt + 24, w);
        PUT16(pkt + 26, h);
        PUT16(pkt + 28, i);
        PUT16(pkt + 30, chunk_count);
        PUT32(pkt + 32, data_size);
        PUT32(pkt + 36, chunk_len);
        memcpy(pkt + 40, data + offset, chunk_len);
        
        send_pkt(s, pkt, 40 + chunk_len);
        offset += chunk_len;
    }
    return 1;
}

/* Send a tile with copy destinations for duplicates */
static void send_tile_with_copies(struct server *s, int tx, int ty, 
                                   int *copy_x, int *copy_y, int num_copies) {
    /* Bounds check */
    if (tx < 0 || tx >= s->tiles_x || ty < 0 || ty >= s->tiles_y)
        return;
    
    int x = tx * TILE_SIZE;
    int y = ty * TILE_SIZE;
    int w = (tx == s->tiles_x - 1) ? (s->width - x) : TILE_SIZE;
    int h = (ty == s->tiles_y - 1) ? (s->height - y) : TILE_SIZE;
    if (w <= 0 || h <= 0) return;
    
    /* Convert XRGB->XBGR and update prev for primary tile */
    for (int row = 0; row < h; row++) {
        uint32_t *curr = &s->framebuf[(y + row) * s->width + x];
        uint32_t *prev = &s->prev_framebuf[(y + row) * s->width + x];
        uint32_t *dst = &s->tilebuf[row * w];
        for (int col = 0; col < w; col++) {
            uint32_t p = curr[col];
            dst[col] = (p & 0xFF00FF00) | ((p >> 16) & 0xFF) | ((p & 0xFF) << 16);
        }
        memcpy(prev, curr, w * 4);
    }
    
    /* Also update prev for copy destinations */
    for (int c = 0; c < num_copies; c++) {
        int cx = copy_x[c];
        int cy = copy_y[c];
        for (int row = 0; row < h; row++) {
            uint32_t *curr = &s->framebuf[(cy + row) * s->width + cx];
            uint32_t *prev = &s->prev_framebuf[(cy + row) * s->width + cx];
            memcpy(prev, curr, w * 4);
        }
    }
    
    int raw_size = w * h * 4;
    int comp_size = LZ4_compress_default((char*)s->tilebuf, s->compbuf, raw_size, s->compbuf_size);
    
    uint8_t *data;
    int data_size;
    int compressed = 0;
    
    if (comp_size > 0 && comp_size < (raw_size * COMPRESSION_THRESHOLD) / 100) {
        data = (uint8_t*)s->compbuf;
        data_size = comp_size;
        compressed = 1;
    } else {
        data = (uint8_t*)s->tilebuf;
        data_size = raw_size;
    }
    
    /* Build list of ALL positions (primary + copies) */
    int total_positions = 1 + num_copies;
    int *all_x = malloc(total_positions * sizeof(int));
    int *all_y = malloc(total_positions * sizeof(int));
    if (!all_x || !all_y) {
        free(all_x);
        free(all_y);
        /* Fallback: send without copies */
        send_tile(s, tx, ty);
        return;
    }
    all_x[0] = x;
    all_y[0] = y;
    for (int c = 0; c < num_copies; c++) {
        all_x[1 + c] = copy_x[c];
        all_y[1 + c] = copy_y[c];
    }
    
    /* Calculate how many copy positions fit per packet after bitmap */
    /* Packet: header(40) + data + count(2) + positions(4 each) */
    int avail = P9WL_MAX_CHUNK - data_size - 2;
    int copies_per_pkt = avail / 4;
    if (copies_per_pkt < 0) copies_per_pkt = 0;
    
    /* Send packets: each has bitmap at one position + list of additional positions */
    int pos_idx = 0;
    while (pos_idx < total_positions) {
        int primary_x = all_x[pos_idx];
        int primary_y = all_y[pos_idx];
        pos_idx++;
        
        /* How many additional positions in this packet? */
        int remaining = total_positions - pos_idx;
        int batch = (remaining > copies_per_pkt) ? copies_per_pkt : remaining;
        
        int flags = (compressed ? P9WL_FLAG_COMPRESSED : 0) |
                    (batch > 0 ? P9WL_FLAG_HAS_COPIES : 0);
        
        uint8_t *pkt = s->sendbuf;
        build_header(pkt, P9WL_TILE, flags, s->send_seq++, s->frame_seq);
        PUT16(pkt + 20, primary_x);
        PUT16(pkt + 22, primary_y);
        PUT16(pkt + 24, w);
        PUT16(pkt + 26, h);
        PUT16(pkt + 28, 0);
        PUT16(pkt + 30, 1);
        PUT32(pkt + 32, data_size);
        PUT32(pkt + 36, data_size);
        memcpy(pkt + 40, data, data_size);
        
        int pkt_len = 40 + data_size;
        
        if (batch > 0) {
            PUT16(pkt + pkt_len, batch);
            pkt_len += 2;
            for (int c = 0; c < batch; c++) {
                PUT16(pkt + pkt_len, all_x[pos_idx + c]);
                PUT16(pkt + pkt_len + 2, all_y[pos_idx + c]);
                pkt_len += 4;
            }
            pos_idx += batch;
        }
        
        send_pkt(s, pkt, pkt_len);
    }
    
    free(all_x);
    free(all_y);
}

/* Send one tile by index - returns 1 if sent */
static int send_tile_idx(struct server *s, int tile_idx) {
    int tx = tile_idx % s->tiles_x;
    int ty = tile_idx / s->tiles_x;
    return send_tile(s, tx, ty);
}

/* Free tile group allocations */
static void free_tile_groups(struct tile_groups *dedup) {
    for (int g = 0; g < MAX_GROUPS; g++) {
        free(dedup->groups[g].copy_x);
        free(dedup->groups[g].copy_y);
        dedup->groups[g].copy_x = NULL;
        dedup->groups[g].copy_y = NULL;
    }
    free(dedup->tile_hashes);
    free(dedup->tile_group);
    dedup->tile_hashes = NULL;
    dedup->tile_group = NULL;
    dedup->num_groups = 0;
}

static int batch_timer_cb(void *data) {
    struct server *s = data;
    send_flush(s);
    return 0;
}

static int calc_flush_delay(int num_tiles) {
    int delay = 5 + (num_tiles / 100);
    return delay > 30 ? 30 : delay;
}

static void send_frame(struct server *s) {
    int total_tiles = s->tiles_x * s->tiles_y;
    
    s->global_frame_count++;
    
    /* Force full frame for first frame to ensure clean startup */
    if (s->global_frame_count <= 1) {
        s->force_full_frame = 1;
    }
    
    if (!s->has_damage && !s->force_full_frame) {
        /* Force full frame periodically to clear any stuck artifacts */
        uint32_t now = now_ms();
        if (now - s->last_full_frame_ms > FULL_FRAME_INTERVAL_MS) {
            s->force_full_frame = 1;
            s->last_full_frame_ms = now;
        } else {
            return;
        }
    }
    
    if (s->force_full_frame)
        s->last_full_frame_ms = now_ms();
    
    s->frame_seq++;
        
    /* Collect tiles that need sending into pending_tiles array */
    s->num_pending = 0;
    for (int ty = 0; ty < s->tiles_y; ty++) {
        for (int tx = 0; tx < s->tiles_x; tx++) {
            int x = tx * TILE_SIZE;
            int y = ty * TILE_SIZE;
            int w = (tx == s->tiles_x - 1) ? (s->width - x) : TILE_SIZE;
            int h = (ty == s->tiles_y - 1) ? (s->height - y) : TILE_SIZE;
            if (w <= 0 || h <= 0) continue;
            
            /* Check damage region */
            if (!s->force_full_frame && s->has_damage) {
                if (x + w <= s->damage_x1 || x >= s->damage_x2 ||
                    y + h <= s->damage_y1 || y >= s->damage_y2) {
                    continue;
                }
            }
            
            /* Check if changed */
            int changed = s->force_full_frame;
            if (!changed) {
                for (int row = 0; row < h; row++) {
                    uint32_t *curr = &s->framebuf[(y + row) * s->width + x];
                    uint32_t *prev = &s->prev_framebuf[(y + row) * s->width + x];
                    if (memcmp(curr, prev, w * 4) != 0) {
                        changed = 1;
                        break;
                    }
                }
            }
            
            if (changed) {
                s->pending_tiles[s->num_pending++] = ty * s->tiles_x + tx;
            }
        }
    }
    
    if (s->global_frame_count <= 10 || (s->global_frame_count % 60 == 0))
        wlr_log(WLR_INFO, "Frame %d: sending %d/%d tiles (force=%d damage=%d)", 
                s->global_frame_count, s->num_pending, total_tiles, s->force_full_frame, s->has_damage);
    
    s->has_damage = 0;
    s->force_full_frame = 0;
    
    if (s->num_pending == 0) {
        return;
    }
    
    /* Allocate hash tracking if needed */
    if (!s->dedup.tile_hashes) {
        s->dedup.tile_hashes = calloc(total_tiles, sizeof(uint32_t));
        s->dedup.tile_group = malloc(total_tiles * sizeof(int));
        if (!s->dedup.tile_hashes || !s->dedup.tile_group) {
            wlr_log(WLR_ERROR, "Failed to allocate dedup arrays");
            free(s->dedup.tile_hashes);
            free(s->dedup.tile_group);
            s->dedup.tile_hashes = NULL;
            s->dedup.tile_group = NULL;
            /* Fallback: send tiles without deduplication */
            for (int i = 0; i < s->num_pending; i++) {
                send_tile_idx(s, s->pending_tiles[i]);
            }
			wl_event_source_timer_update(s->batch_timer, calc_flush_delay(s->num_pending));
            return;
        }
    }
    
    /* Compute hashes for pending tiles - hash original XRGB data for efficiency */
    for (int i = 0; i < s->num_pending; i++) {
        int tile_idx = s->pending_tiles[i];
        int tx = tile_idx % s->tiles_x;
        int ty = tile_idx / s->tiles_x;
        int x = tx * TILE_SIZE;
        int y = ty * TILE_SIZE;
        int w = (tx == s->tiles_x - 1) ? (s->width - x) : TILE_SIZE;
        int h = (ty == s->tiles_y - 1) ? (s->height - y) : TILE_SIZE;
        
        /* Hash directly from framebuf (XRGB) - faster than converting first */
        uint32_t hash = 2166136261u;
        for (int row = 0; row < h; row++) {
            uint32_t *src = &s->framebuf[(y + row) * s->width + x];
            for (int col = 0; col < w; col++) {
                hash ^= src[col];
                hash *= 16777619u;
            }
        }
        s->dedup.tile_hashes[tile_idx] = hash;
    }
    
    /* Reset groups */
    for (int g = 0; g < s->dedup.num_groups; g++) {
        s->dedup.groups[g].num_copies = 0;
    }
    s->dedup.num_groups = 0;
    
    /* Clear group assignments */
    for (int i = 0; i < s->num_pending; i++) {
        s->dedup.tile_group[s->pending_tiles[i]] = -1;
    }
    
    /* Group pending tiles by hash */
    for (int i = 0; i < s->num_pending; i++) {
        int tile_idx = s->pending_tiles[i];
        int tx = tile_idx % s->tiles_x;
        int ty = tile_idx / s->tiles_x;
        int x = tx * TILE_SIZE;
        int y = ty * TILE_SIZE;
        
        uint32_t hash = s->dedup.tile_hashes[tile_idx];
        
        /* Find existing group with same hash */
        int found_group = -1;
        for (int g = 0; g < s->dedup.num_groups; g++) {
            if (s->dedup.groups[g].hash == hash) {
                found_group = g;
                break;
            }
        }
        
        if (found_group >= 0) {
            /* Add as copy to existing group */
            struct tile_group *grp = &s->dedup.groups[found_group];
            if (grp->num_copies < MAX_COPIES_PER_GROUP) {
                if (!grp->copy_x) {
                    grp->copy_x = malloc(MAX_COPIES_PER_GROUP * sizeof(int));
                    grp->copy_y = malloc(MAX_COPIES_PER_GROUP * sizeof(int));
                    if (!grp->copy_x || !grp->copy_y) {
                        free(grp->copy_x);
                        free(grp->copy_y);
                        grp->copy_x = NULL;
                        grp->copy_y = NULL;
                        continue;  /* Skip this copy */
                    }
                }
                grp->copy_x[grp->num_copies] = x;
                grp->copy_y[grp->num_copies] = y;
                grp->num_copies++;
                s->dedup.tile_group[tile_idx] = found_group;
            }
        } else if (s->dedup.num_groups < MAX_GROUPS) {
            /* Create new group */
            struct tile_group *grp = &s->dedup.groups[s->dedup.num_groups];
            grp->hash = hash;
            grp->primary_idx = tile_idx;
            grp->num_copies = 0;
            s->dedup.tile_group[tile_idx] = s->dedup.num_groups;
            s->dedup.num_groups++;
        }
    }
    
    /* Send each group */
    for (int g = 0; g < s->dedup.num_groups; g++) {
        struct tile_group *grp = &s->dedup.groups[g];
        int tile_idx = grp->primary_idx;
        int tx = tile_idx % s->tiles_x;
        int ty = tile_idx / s->tiles_x;
        
        if (grp->num_copies > 0) {
            send_tile_with_copies(s, tx, ty, grp->copy_x, grp->copy_y, grp->num_copies);
        } else {
            send_tile(s, tx, ty);
        }
    }
    
    wl_event_source_timer_update(s->batch_timer, calc_flush_delay(s->dedup.num_groups));
}

static void handle_key(struct server *s, int rune) {
    const struct key_map *km = NULL;
    for (const struct key_map *m = keymap; m->keycode; m++) {
        if (m->rune == rune) { km = m; break; }
    }
    
    /* Keymap match takes priority */
    if (km) {
        uint32_t t = now_ms();
        wlr_seat_set_keyboard(s->seat, &s->virtual_kb);
        
        if (km->shift) {
            struct wlr_keyboard_modifiers mods = { .depressed = WLR_MODIFIER_SHIFT };
            wlr_seat_keyboard_notify_modifiers(s->seat, &mods);
        }
        wlr_seat_keyboard_notify_key(s->seat, t, km->keycode, WL_KEYBOARD_KEY_STATE_PRESSED);
        wlr_seat_keyboard_notify_key(s->seat, t+1, km->keycode, WL_KEYBOARD_KEY_STATE_RELEASED);
        if (km->shift) {
            struct wlr_keyboard_modifiers mods = {0};
            wlr_seat_keyboard_notify_modifiers(s->seat, &mods);
        }
        return;
    }
    
    /* Ctrl+letter for unmapped control characters */
    if (rune >= 1 && rune <= 26) {
        int keycode = KEY_A + rune - 1;
        uint32_t t = now_ms();
        struct wlr_keyboard_modifiers mods = { .depressed = WLR_MODIFIER_CTRL };
        wlr_seat_set_keyboard(s->seat, &s->virtual_kb);
        wlr_seat_keyboard_notify_modifiers(s->seat, &mods);
        wlr_seat_keyboard_notify_key(s->seat, t, keycode, WL_KEYBOARD_KEY_STATE_PRESSED);
        wlr_seat_keyboard_notify_key(s->seat, t+1, keycode, WL_KEYBOARD_KEY_STATE_RELEASED);
        mods.depressed = 0;
        wlr_seat_keyboard_notify_modifiers(s->seat, &mods);
        return;
    }
}
static void handle_mouse(struct server *s, int mx, int my, int buttons) {
    wlr_cursor_warp_absolute(s->cursor, NULL, (double)mx / s->width, (double)my / s->height);
    
    double sx, sy;
    struct wlr_surface *surface = NULL;
    struct wlr_scene_node *node = wlr_scene_node_at(&s->scene->tree.node,
                                                     s->cursor->x, s->cursor->y, &sx, &sy);
    if (node && node->type == WLR_SCENE_NODE_BUFFER) {
        struct wlr_scene_buffer *sb = wlr_scene_buffer_from_node(node);
        struct wlr_scene_surface *ss = wlr_scene_surface_try_from_buffer(sb);
        if (ss) surface = ss->surface;
    }
    
    uint32_t t = now_ms();
    static int last_buttons = 0;
    int changed = buttons ^ last_buttons;
    
    if (surface) {
        wlr_seat_pointer_notify_enter(s->seat, surface, sx, sy);
        wlr_seat_pointer_notify_motion(s->seat, t, sx, sy);
        if ((changed & 1) && (buttons & 1)) {
            wlr_seat_keyboard_notify_enter(s->seat, surface,
                s->virtual_kb.keycodes, s->virtual_kb.num_keycodes,
                &s->virtual_kb.modifiers);
        }
    } else {
        wlr_seat_pointer_clear_focus(s->seat);
    }
    
    if (changed & 1) wlr_seat_pointer_notify_button(s->seat, t, BTN_LEFT,
        (buttons & 1) ? WL_POINTER_BUTTON_STATE_PRESSED : WL_POINTER_BUTTON_STATE_RELEASED);
    if (changed & 2) wlr_seat_pointer_notify_button(s->seat, t, BTN_MIDDLE,
        (buttons & 2) ? WL_POINTER_BUTTON_STATE_PRESSED : WL_POINTER_BUTTON_STATE_RELEASED);
    if (changed & 4) wlr_seat_pointer_notify_button(s->seat, t, BTN_RIGHT,
        (buttons & 4) ? WL_POINTER_BUTTON_STATE_PRESSED : WL_POINTER_BUTTON_STATE_RELEASED);
    
    if (buttons & 8) wlr_seat_pointer_notify_axis(s->seat, t, WL_POINTER_AXIS_VERTICAL_SCROLL,
        -15, -1, WL_POINTER_AXIS_SOURCE_WHEEL, WL_POINTER_AXIS_RELATIVE_DIRECTION_IDENTICAL);
    if (buttons & 16) wlr_seat_pointer_notify_axis(s->seat, t, WL_POINTER_AXIS_VERTICAL_SCROLL,
        15, 1, WL_POINTER_AXIS_SOURCE_WHEEL, WL_POINTER_AXIS_RELATIVE_DIRECTION_IDENTICAL);
    
    last_buttons = buttons;
    wlr_seat_pointer_notify_frame(s->seat);
}

static void do_resize(struct server *s, int nw, int nh) {
    if (nw <= 0 || nh <= 0 || nw > MAX_SCREEN_DIM || nh > MAX_SCREEN_DIM) return;
    if (nw == s->width && nh == s->height) return;
    
    wlr_log(WLR_INFO, "Resize: %dx%d -> %dx%d", s->width, s->height, nw, nh);
    
    s->width = nw;
    s->height = nh;
    s->tiles_x = (nw + TILE_SIZE - 1) / TILE_SIZE;
    s->tiles_y = (nh + TILE_SIZE - 1) / TILE_SIZE;
    int total_tiles = s->tiles_x * s->tiles_y;
    
    /* Free old buffers */
    free(s->framebuf);
    free(s->prev_framebuf);
    free(s->tilebuf);
    free(s->pending_tiles);
    
    /* Allocate new buffers */
    s->framebuf = calloc(nw * nh, 4);
    s->prev_framebuf = calloc(nw * nh, 4);
    s->tilebuf = malloc(TILE_SIZE * TILE_SIZE * 4);
    s->pending_tiles = malloc(total_tiles * sizeof(int));
    
    if (!s->framebuf || !s->prev_framebuf || !s->tilebuf || !s->pending_tiles) {
        wlr_log(WLR_ERROR, "Resize allocation failed");
        /* Attempt recovery - this is fatal but try to avoid crash */
        return;
    }
    
    /* Free and reallocate dedup arrays */
    free_tile_groups(&s->dedup);
    s->dedup.tile_hashes = calloc(total_tiles, sizeof(uint32_t));
    s->dedup.tile_group = malloc(total_tiles * sizeof(int));
    /* Note: These can be NULL, send_frame handles that case */
    
    s->force_full_frame = 1;
    
    if (s->output) {
        struct wlr_output_state state;
        wlr_output_state_init(&state);
        wlr_output_state_set_enabled(&state, true);
        wlr_output_state_set_custom_mode(&state, nw, nh, 60000);
        wlr_output_commit_state(s->output, &state);
        wlr_output_state_finish(&state);
    }
    
    struct toplevel *tl;
    wl_list_for_each(tl, &s->toplevels, link) {
        if (tl->xdg) {
            wlr_xdg_toplevel_set_size(tl->xdg, nw, nh);
            wlr_xdg_toplevel_set_maximized(tl->xdg, true);
        }
    }
}

static void handle_packet(struct server *s, uint8_t *buf, size_t len) {
    if (len < P9WL_HDR_SIZE) return;
    if (GET32(buf + HDR_MAGIC) != P9WL_MAGIC) return;
    if (buf[HDR_VERSION] != P9WL_VERSION) {
        wlr_log(WLR_DEBUG, "Protocol version mismatch: got %d, want %d",
                buf[HDR_VERSION], P9WL_VERSION);
        return;
    }
    
    switch (buf[HDR_TYPE]) {
    case P9WL_MOUSE:
        if (len >= 28) {
            handle_mouse(s, (int16_t)GET16(buf + 20), (int16_t)GET16(buf + 22), buf[24]);
        }
        break;
    case P9WL_KEY:
        if (len >= 28) {
            handle_key(s, (int32_t)GET32(buf + 20));
        }
        break;
    case P9WL_RESIZE:
        if (len >= 24) {
            do_resize(s, GET16(buf + 20), GET16(buf + 22));
        }
        break;
    }
}

static int handle_udp(int fd, uint32_t mask, void *data) {
    struct server *s = data;
    uint8_t buf[2048];
    while (1) {
        ssize_t n = recv(s->udp_fd, buf, sizeof(buf), MSG_DONTWAIT);
        if (n <= 0) break;
        handle_packet(s, buf, n);
    }
    return 0;
}

static int get_size(int udp_fd, struct sockaddr_in *addr, int *w, int *h) {
    uint8_t pkt[24];
    PUT32(pkt + HDR_MAGIC, P9WL_MAGIC);
    pkt[HDR_VERSION] = P9WL_VERSION;
    pkt[HDR_TYPE] = P9WL_SIZE_REQ;
    pkt[HDR_FLAGS] = 0;
    pkt[7] = 0;
    PUT32(pkt + HDR_SEQ, 0);
    PUT32(pkt + HDR_FRAME, 0);
    PUT32(pkt + HDR_TIMESTAMP, now_ms());
    PUT16(pkt + 20, 0);
    PUT16(pkt + 22, 0);
    
    for (int attempt = 0; attempt < SIZE_REQ_ATTEMPTS; attempt++) {
        wlr_log(WLR_INFO, "SIZE_REQ attempt %d/%d", attempt + 1, SIZE_REQ_ATTEMPTS);
        sendto(udp_fd, pkt, 24, 0, (struct sockaddr*)addr, sizeof(*addr));
        struct pollfd pfd = { .fd = udp_fd, .events = POLLIN };
        if (poll(&pfd, 1, SIZE_REQ_TIMEOUT_MS) > 0) {
            uint8_t buf[64];
            ssize_t n = recv(udp_fd, buf, sizeof(buf), 0);
            if (n >= 24 && GET32(buf + HDR_MAGIC) == P9WL_MAGIC &&
                buf[HDR_TYPE] == P9WL_SIZE_RESP) {
                *w = GET16(buf + 20);
                *h = GET16(buf + 22);
                wlr_log(WLR_INFO, "SIZE_RESP: %dx%d", *w, *h);
                return 0;
            }
        }
    }
    return -1;
}

/* Toplevel callbacks */
static void tl_map(struct wl_listener *l, void *d) {
    struct toplevel *tl = wl_container_of(l, tl, map);
    wlr_xdg_toplevel_set_size(tl->xdg, tl->server->width, tl->server->height);
    wlr_xdg_toplevel_set_maximized(tl->xdg, true);
    wlr_seat_keyboard_notify_enter(tl->server->seat, tl->xdg->base->surface,
        tl->server->virtual_kb.keycodes, tl->server->virtual_kb.num_keycodes,
        &tl->server->virtual_kb.modifiers);
}
static void tl_unmap(struct wl_listener *l, void *d) {}
static void tl_commit(struct wl_listener *l, void *d) {
    struct toplevel *tl = wl_container_of(l, tl, commit);
    if (tl->xdg->base->initial_commit) {
        wlr_xdg_toplevel_set_size(tl->xdg, tl->server->width, tl->server->height);
        wlr_xdg_toplevel_set_maximized(tl->xdg, true);
    }
}
static void tl_destroy(struct wl_listener *l, void *d) {
    struct toplevel *tl = wl_container_of(l, tl, destroy);
    wl_list_remove(&tl->map.link);
    wl_list_remove(&tl->unmap.link);
    wl_list_remove(&tl->commit.link);
    wl_list_remove(&tl->destroy.link);
    wl_list_remove(&tl->link);
    free(tl);
}

static void new_toplevel(struct wl_listener *l, void *d) {
    struct server *s = wl_container_of(l, s, new_xdg_toplevel);
    struct wlr_xdg_toplevel *xdg = d;
    
    struct toplevel *tl = calloc(1, sizeof(*tl));
    if (!tl) {
        wlr_log(WLR_ERROR, "Failed to allocate toplevel");
        return;
    }
    tl->server = s;
    tl->xdg = xdg;
    tl->scene_tree = wlr_scene_xdg_surface_create(&s->scene->tree, xdg->base);
    xdg->base->data = tl->scene_tree;
    
    tl->map.notify = tl_map; wl_signal_add(&xdg->base->surface->events.map, &tl->map);
    tl->unmap.notify = tl_unmap; wl_signal_add(&xdg->base->surface->events.unmap, &tl->unmap);
    tl->commit.notify = tl_commit; wl_signal_add(&xdg->base->surface->events.commit, &tl->commit);
    tl->destroy.notify = tl_destroy; wl_signal_add(&xdg->events.destroy, &tl->destroy);
    
    wl_list_insert(&s->toplevels, &tl->link);
}

static void output_frame(struct wl_listener *listener, void *data) {
    struct server *s = wl_container_of(listener, s, output_frame);
    
    s->global_frame_count++;
    if (!s || !s->scene_output || !s->framebuf || !s->output) {
        if (s->global_frame_count <= 5)
            wlr_log(WLR_ERROR, "output_frame: null pointer");
        return;
    }
    
    struct wlr_scene_output *so = s->scene_output;
    struct wlr_output_state state;
    wlr_output_state_init(&state);
    
    struct wlr_scene_output_state_options opts = {0};
    
    if (!wlr_scene_output_build_state(so, &state, &opts)) {
        wlr_output_state_finish(&state);
        /* Fill with gray on failure */
        for (int i = 0; i < s->width * s->height; i++)
            s->framebuf[i] = 0xFF303030;
        s->force_full_frame = 1;
        if (s->global_frame_count <= 5)
            wlr_log(WLR_INFO, "output_frame %d: build_state failed, sending gray", s->global_frame_count);
        send_frame(s);
        return;
    }
    
    /* Extract damage */
    s->has_damage = 0;
    if (state.committed & WLR_OUTPUT_STATE_DAMAGE) {
        pixman_box32_t *ext = pixman_region32_extents(&state.damage);
        if (ext->x2 > ext->x1 && ext->y2 > ext->y1) {
            s->damage_x1 = ext->x1;
            s->damage_y1 = ext->y1;
            s->damage_x2 = ext->x2;
            s->damage_y2 = ext->y2;
            s->has_damage = 1;
        }
    }
    
    struct wlr_buffer *buffer = state.buffer;
    
    if (s->global_frame_count <= 5)
        wlr_log(WLR_INFO, "output_frame %d: damage=%d force=%d buffer=%p", 
                s->global_frame_count, s->has_damage, s->force_full_frame, (void*)buffer);
    
    /* Skip if no damage and not forced */
    if (!s->force_full_frame && !s->has_damage) {
        wlr_output_commit_state(s->output, &state);
        wlr_output_state_finish(&state);
        struct timespec now;
        clock_gettime(CLOCK_MONOTONIC, &now);
        wlr_scene_output_send_frame_done(so, &now);
        return;
    }
    
    if (!buffer) {
        wlr_log(WLR_ERROR, "output_frame %d: no buffer", s->global_frame_count);
        wlr_output_commit_state(s->output, &state);
        wlr_output_state_finish(&state);
        struct timespec now;
        clock_gettime(CLOCK_MONOTONIC, &now);
        wlr_scene_output_send_frame_done(so, &now);
        return;
    }
    
    /* Copy pixels from buffer */
    if (buffer->width == s->width && buffer->height == s->height) {
        void *data_ptr = NULL;
        uint32_t fmt = 0;
        size_t stride = 0;
        
        if (wlr_buffer_begin_data_ptr_access(buffer, WLR_BUFFER_DATA_PTR_ACCESS_READ,
                                              &data_ptr, &fmt, &stride)) {
            if (s->global_frame_count <= 10)
                wlr_log(WLR_INFO, "output_frame %d: stride=%zu width=%d (expected %d) fmt=0x%x", 
                        s->global_frame_count, stride, buffer->width, buffer->width * 4, fmt);
            
            /* Always copy full buffer */
            int nonzero = 0;
            for (int y = 0; y < buffer->height; y++) {
                uint8_t *src = (uint8_t*)data_ptr + y * stride;
                uint32_t *dst = &s->framebuf[y * s->width];
                memcpy(dst, src, s->width * 4);
                if (!nonzero) {
                    for (int x = 0; x < s->width; x++) {
                        if (dst[x] != 0) { nonzero = 1; break; }
                    }
                }
            }
            if (s->global_frame_count <= 10)
                wlr_log(WLR_INFO, "output_frame %d: copied, nonzero=%d", s->global_frame_count, nonzero);
            wlr_buffer_end_data_ptr_access(buffer);
        } else {
            wlr_log(WLR_ERROR, "output_frame %d: buffer access failed, skipping", s->global_frame_count);
            wlr_output_commit_state(s->output, &state);
            wlr_output_state_finish(&state);
            struct timespec now;
            clock_gettime(CLOCK_MONOTONIC, &now);
            wlr_scene_output_send_frame_done(so, &now);
            return;
        }
    } else {
        wlr_log(WLR_ERROR, "output_frame %d: bad buffer (buf=%p size=%dx%d expected %dx%d), skipping",
                s->global_frame_count, (void*)buffer, 
                buffer->width, buffer->height,
                s->width, s->height);
        wlr_output_commit_state(s->output, &state);
        wlr_output_state_finish(&state);
        struct timespec now;
        clock_gettime(CLOCK_MONOTONIC, &now);
        wlr_scene_output_send_frame_done(so, &now);
        return;
    }
    
    wlr_output_commit_state(s->output, &state);
    wlr_output_state_finish(&state);
    
    struct timespec now;
    clock_gettime(CLOCK_MONOTONIC, &now);
    wlr_scene_output_send_frame_done(so, &now);
    
    send_frame(s);
}

static void output_destroy(struct wl_listener *listener, void *data) {
    struct server *s = wl_container_of(listener, s, output_destroy);
    wl_list_remove(&s->output_frame.link);
    wl_list_remove(&s->output_destroy.link);
}

static void new_output(struct wl_listener *l, void *d) {
    struct server *s = wl_container_of(l, s, new_output);
    struct wlr_output *out = d;
    
    wlr_output_init_render(out, s->allocator, s->renderer);
    
    struct wlr_output_state state;
    wlr_output_state_init(&state);
    wlr_output_state_set_enabled(&state, true);
    wlr_output_state_set_custom_mode(&state, s->width, s->height, 60000);
    wlr_output_commit_state(out, &state);
    wlr_output_state_finish(&state);
    
    wlr_output_layout_add_auto(s->output_layout, out);
    s->output = out;
    s->scene_output = wlr_scene_output_create(s->scene, out);
    
    s->output_frame.notify = output_frame;
    wl_signal_add(&out->events.frame, &s->output_frame);
    s->output_destroy.notify = output_destroy;
    wl_signal_add(&out->events.destroy, &s->output_destroy);
    
    wlr_log(WLR_INFO, "Output ready: %dx%d", s->width, s->height);
}

static void new_input(struct wl_listener *l, void *d) {
    struct server *s = wl_container_of(l, s, new_input);
    struct wlr_input_device *dev = d;
    if (dev->type == WLR_INPUT_DEVICE_POINTER)
        wlr_cursor_attach_input_device(s->cursor, dev);
}

/* Cleanup function for graceful shutdown */
static void server_cleanup(struct server *s) {
    free_tile_groups(&s->dedup);
    free(s->framebuf);
    free(s->prev_framebuf);
    free(s->tilebuf);
    free(s->compbuf);
    free(s->sendbuf);
    free(s->pending_tiles);
    if (s->udp_fd >= 0)
        close(s->udp_fd);
}

int main(int argc, char *argv[]) {
    if (argc < 2) {
        fprintf(stderr, "Usage: %s <plan9-ip> [port]\
", argv[0]);
        return 1;
    }
    
    signal(SIGPIPE, SIG_IGN);
    wlr_log_init(WLR_INFO, NULL);
    
    struct server s = {0};
    wl_list_init(&s.toplevels);
    
    /* UDP socket */
    s.udp_fd = socket(AF_INET, SOCK_DGRAM, 0);
    if (s.udp_fd < 0) { perror("socket"); return 1; }
    
    int port = (argc > 2) ? atoi(argv[2]) : 5556;
    s.p9_addr.sin_family = AF_INET;
    s.p9_addr.sin_port = htons(port);
    if (inet_pton(AF_INET, argv[1], &s.p9_addr.sin_addr) != 1) {
        fprintf(stderr, "Invalid address: %s\
", argv[1]);
        close(s.udp_fd);
        return 1;
    }
    
    wlr_log(WLR_INFO, "Connecting to %s:%d", argv[1], port);
    
    /* Get size from Plan 9 */
    s.width = 1024; s.height = 768;
    if (get_size(s.udp_fd, &s.p9_addr, &s.width, &s.height) < 0) {
        wlr_log(WLR_ERROR, "Failed to get size, using %dx%d", s.width, s.height);
    }
    
    s.tiles_x = (s.width + TILE_SIZE - 1) / TILE_SIZE;
    s.tiles_y = (s.height + TILE_SIZE - 1) / TILE_SIZE;
    int total_tiles = s.tiles_x * s.tiles_y;
    
    s.framebuf = calloc(s.width * s.height, 4);
    s.prev_framebuf = calloc(s.width * s.height, 4);
    s.tilebuf = malloc(TILE_SIZE * TILE_SIZE * 4);
    s.compbuf_size = TILE_SIZE * TILE_SIZE * 4 * 2;
    s.compbuf = malloc(s.compbuf_size);
    s.sendbuf = malloc(P9WL_MTU + 64);
    s.pending_tiles = malloc(total_tiles * sizeof(int));
    
    /* Check all allocations */
    if (!s.framebuf || !s.prev_framebuf || !s.tilebuf || 
        !s.compbuf || !s.sendbuf || !s.pending_tiles) {
        wlr_log(WLR_ERROR, "Memory allocation failed");
        server_cleanup(&s);
        return 1;
    }
    
    s.num_pending = 0;
    s.batch_active = 0;
    s.force_full_frame = 1;
    s.last_full_frame_ms = now_ms();
    s.global_frame_count = 0;
    
    /* Wayland - force Pixman renderer for buffer access */
    setenv("WLR_RENDERER", "pixman", 1);
    setenv("WLR_SCENE_DISABLE_DIRECT_SCANOUT", "1", 1);
    
    s.display = wl_display_create();
    s.backend = wlr_headless_backend_create(wl_display_get_event_loop(s.display));
    if (!s.backend) { wlr_log(WLR_ERROR, "Backend failed"); server_cleanup(&s); return 1; }
    
    s.renderer = wlr_renderer_autocreate(s.backend);
    if (!s.renderer) { wlr_log(WLR_ERROR, "Renderer failed"); server_cleanup(&s); return 1; }
    wlr_renderer_init_wl_display(s.renderer, s.display);
    
    s.allocator = wlr_allocator_autocreate(s.backend, s.renderer);
    if (!s.allocator) { wlr_log(WLR_ERROR, "Allocator failed"); server_cleanup(&s); return 1; }
    
    wlr_compositor_create(s.display, 5, s.renderer);
    wlr_subcompositor_create(s.display);
    wlr_data_device_manager_create(s.display);
    
    s.output_layout = wlr_output_layout_create(s.display);
    s.scene = wlr_scene_create();
    wlr_scene_attach_output_layout(s.scene, s.output_layout);
    
    s.xdg_shell = wlr_xdg_shell_create(s.display, 3);
    s.new_xdg_toplevel.notify = new_toplevel;
    wl_signal_add(&s.xdg_shell->events.new_toplevel, &s.new_xdg_toplevel);
    
    s.cursor = wlr_cursor_create();
    wlr_cursor_attach_output_layout(s.cursor, s.output_layout);
    s.cursor_mgr = wlr_xcursor_manager_create(NULL, 24);
    
    s.seat = wlr_seat_create(s.display, "seat0");
    wlr_seat_set_capabilities(s.seat, WL_SEAT_CAPABILITY_POINTER | WL_SEAT_CAPABILITY_KEYBOARD);
    
    /* Virtual keyboard */
    wlr_keyboard_init(&s.virtual_kb, NULL, "virtual-keyboard");
    struct xkb_context *ctx = xkb_context_new(XKB_CONTEXT_NO_FLAGS);
    struct xkb_keymap *km = xkb_keymap_new_from_names(ctx, NULL, XKB_KEYMAP_COMPILE_NO_FLAGS);
    wlr_keyboard_set_keymap(&s.virtual_kb, km);
    xkb_keymap_unref(km);
    xkb_context_unref(ctx);
    wlr_seat_set_keyboard(s.seat, &s.virtual_kb);
    
    s.new_output.notify = new_output;
    wl_signal_add(&s.backend->events.new_output, &s.new_output);
    s.new_input.notify = new_input;
    wl_signal_add(&s.backend->events.new_input, &s.new_input);
    
    wlr_headless_add_output(s.backend, s.width, s.height);
    
    const char *sock = wl_display_add_socket_auto(s.display);
    if (!sock) { wlr_log(WLR_ERROR, "Socket failed"); server_cleanup(&s); return 1; }
    
    wlr_log(WLR_INFO, "WAYLAND_DISPLAY=%s (%dx%d)", sock, s.width, s.height);
    setenv("WAYLAND_DISPLAY", sock, 1);
    
    wl_event_loop_add_fd(wl_display_get_event_loop(s.display), s.udp_fd,
                         WL_EVENT_READABLE, handle_udp, &s);
    
	s.batch_timer = wl_event_loop_add_timer(wl_display_get_event_loop(s.display),
                                         batch_timer_cb, &s);

    if (!wlr_backend_start(s.backend)) { wlr_log(WLR_ERROR, "Start failed"); server_cleanup(&s); return 1; }
    
    wlr_log(WLR_INFO, "Running");
    wl_display_run(s.display);
    
    wl_display_destroy(s.display);
    server_cleanup(&s);
    return 0;
}