Mesh Data Model and Schema

Entity-Relationship Diagram

┌─────────────┐       ┌─────────────┐       ┌─────────────┐
│   nodes     │       │  node_tags  │       │    tags      │
├─────────────┤       ├─────────────┤       ├─────────────┤
│ id (PK)     │──────<│ node_id(FK) │>──────│ id (PK)     │
│ type        │       │ tag_id (FK) │       │ name (UQ)   │
│ title       │       └─────────────┘       └─────────────┘
│ content     │
│ summary     │       ┌─────────────────┐
│ source_url  │       │     edges       │
│ image_key   │       ├─────────────────┤
│ embedding   │──────<│ source_id (FK)  │
│ version     │       │ target_id (FK)  │>──── nodes.id
│ created_at  │       │ rel_type        │
│ updated_at  │       │ weight          │
│ updated_at  │       │ created_at      │
└─────────────┘       └─────────────────┘
      │               
      │               
      │               ┌─────────────────┐
      └──────────────<│ review_schedule │
                      ├─────────────────┤
                      │ node_id (PK,FK) │
                      │ stability       │
                      │ difficulty      │
                      │ due_date        │
                      │ last_review     │
                      │ reps            │
                      │ lapses          │
                      └─────────────────┘

┌─────────────────┐
│      jobs       │
├─────────────────┤
│ id (PK)         │
│ type            │
│ payload (JSONB) │
│ status          │
│ claimed_at      │
│ completed_at    │
│ error           │
│ created_at      │
└─────────────────┘

SQL Schema

-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "vector";        -- pgvector
CREATE EXTENSION IF NOT EXISTS "pg_trgm";       -- fuzzy text search

-- Core node representing any knowledge entity
CREATE TABLE nodes (
    id          UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    type        TEXT NOT NULL CHECK (type IN (
                    'article', 'book', 'hobby', 'thought',
                    'journal', 'wildcard', 'image'
                )),
    title       TEXT NOT NULL,
    content     TEXT,
    summary     TEXT,
    source_url  TEXT,
    image_key   TEXT,
    embedding   vector(768),
    version     INTEGER NOT NULL DEFAULT 1,
    created_at  TIMESTAMPTZ NOT NULL DEFAULT now(),
    updated_at  TIMESTAMPTZ NOT NULL DEFAULT now()
);

-- Tags / concepts extracted by NLP
CREATE TABLE tags (
    id          UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    name        TEXT NOT NULL UNIQUE
);

-- Many-to-many: nodes <-> tags
CREATE TABLE node_tags (
    node_id     UUID NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
    tag_id      UUID NOT NULL REFERENCES tags(id) ON DELETE CASCADE,
    confidence  REAL DEFAULT 1.0,
    PRIMARY KEY (node_id, tag_id)
);

-- Edges between nodes
CREATE TABLE edges (
    id          UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    source_id   UUID NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
    target_id   UUID NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
    rel_type    TEXT NOT NULL CHECK (rel_type IN (
                    'tag_shared', 'manual', 'semantic', 'bridge', 'wildcard'
                )),
    weight      REAL NOT NULL DEFAULT 1.0,
    created_at  TIMESTAMPTZ NOT NULL DEFAULT now(),
    UNIQUE(source_id, target_id, rel_type)
);

-- FSRS spaced repetition scheduling
CREATE TABLE review_schedule (
    node_id     UUID PRIMARY KEY REFERENCES nodes(id) ON DELETE CASCADE,
    stability   REAL NOT NULL DEFAULT 0.4,
    difficulty  REAL NOT NULL DEFAULT 5.0,
    due_date    TIMESTAMPTZ NOT NULL DEFAULT now(),
    last_review TIMESTAMPTZ,
    reps        INTEGER NOT NULL DEFAULT 0,
    lapses      INTEGER NOT NULL DEFAULT 0,
    state       TEXT NOT NULL DEFAULT 'new' CHECK (state IN (
                    'new', 'learning', 'review', 'relearning'
                ))
);

-- Background job queue
CREATE TABLE jobs (
    id              UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    type            TEXT NOT NULL CHECK (type IN (
                        'process_url', 'process_text', 'process_image',
                        'generate_embedding', 'build_edges',
                        'discovery_run', 'wildcard_inject',
                        'reembed_batch'
                    )),
    payload         JSONB NOT NULL,
    status          TEXT NOT NULL DEFAULT 'pending' CHECK (status IN (
                        'pending', 'running', 'done', 'failed', 'dead'
                    )),
    attempts        INTEGER NOT NULL DEFAULT 0,
    max_attempts    INTEGER NOT NULL DEFAULT 3,
    claimed_at      TIMESTAMPTZ,
    completed_at    TIMESTAMPTZ,
    scheduled_for   TIMESTAMPTZ DEFAULT now(),
    error           TEXT,
    created_at      TIMESTAMPTZ NOT NULL DEFAULT now()
);

-- Discovery run history
CREATE TABLE discovery_runs (
    id              UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    run_type        TEXT NOT NULL CHECK (run_type IN (
                        'cluster_analysis', 'bridge_detection', 'wildcard_injection'
                    )),
    results         JSONB NOT NULL,
    nodes_affected  INTEGER NOT NULL DEFAULT 0,
    edges_created   INTEGER NOT NULL DEFAULT 0,
    executed_at     TIMESTAMPTZ NOT NULL DEFAULT now()
);

Key Query Patterns

BFS Graph Traversal

WITH RECURSIVE graph_walk AS (
    SELECT target_id AS node_id, 1 AS depth, ARRAY[source_id, target_id] AS path
    FROM edges WHERE source_id = $1
    UNION ALL
    SELECT e.target_id, gw.depth + 1, gw.path || e.target_id
    FROM edges e
    JOIN graph_walk gw ON e.source_id = gw.node_id
    WHERE gw.depth < $2 AND NOT (e.target_id = ANY(gw.path))
)
SELECT DISTINCT ON (n.id) n.*, gw.depth
FROM graph_walk gw
JOIN nodes n ON n.id = gw.node_id
ORDER BY n.id, gw.depth;

Semantic Similarity Search

SELECT id, title, type, summary, 1 - (embedding <=> $1) AS similarity
FROM nodes WHERE embedding IS NOT NULL AND id != $2
ORDER BY embedding <=> $1 LIMIT $3;