Database Schema
-- Events Platform: initial unified schema
-- Target: Postgres (Supabase)
--
-- Design goals:
-- - One schema serves all 4 brands (HeyAustin, HeyCrestedButte, 6thStreet, LakeTravis)
-- via a `sites` table + `site_id` foreign keys, rather than 4 separate databases.
-- - Venues, events, rentals, and posts are the four core content types — confirmed
-- against live recon of the current WordPress sites (HeyAustin, LakeTravis,
-- CrestedButte all run Listify + WP Job Manager + a "rental" post type + a
-- Facebook Events importer + the Listar mobile-app REST API).
-- - AI-ingested events land in a staging table (`ingested_events`) for human
-- review before becoming real `events` rows — mirrors vee.py's matched /
-- missing_on_site / missing_on_venue output, but persisted instead of ad hoc JSON.
-- - Public app users (favorites/wishlist, reviews) are separate from admin_users,
-- confirmed required by the live Listar API's auth/register + wishlist/* routes.
-- - `search_documents` is a hybrid keyword+vector index feeding the AI search
-- feature, kept separate from content tables so re-embedding never touches them.
create extension if not exists "pgcrypto";
create extension if not exists "vector";
-- ---------------------------------------------------------------------------
-- Multi-tenant root: one row per brand/site
-- ---------------------------------------------------------------------------
create table sites (
id uuid primary key default gen_random_uuid(),
slug text unique not null, -- 'heyaustin', 'heycrestedbutte', '6thstreet', 'laketravis'
name text not null,
domain text unique not null, -- 'heyaustin.com'
timezone text not null default 'America/Chicago',
brand_config jsonb not null default '{}', -- theme colors, logo urls, social links, etc.
created_at timestamptz not null default now()
);
-- ---------------------------------------------------------------------------
-- Users & roles (admin/curation portal accounts, not public app accounts)
-- ---------------------------------------------------------------------------
create table admin_users (
id uuid primary key default gen_random_uuid(),
email text unique not null,
full_name text,
created_at timestamptz not null default now()
);
create table admin_user_site_roles (
admin_user_id uuid not null references admin_users(id) on delete cascade,
site_id uuid not null references sites(id) on delete cascade,
role text not null check (role in ('owner', 'editor', 'curator', 'venue_partner')),
primary key (admin_user_id, site_id)
);
-- ---------------------------------------------------------------------------
-- Shared taxonomy (categories/tags), scoped per site so brands can diverge
-- ---------------------------------------------------------------------------
create table categories (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
kind text not null check (kind in ('venue', 'event', 'post', 'rental', 'deal')),
slug text not null,
name text not null,
description text,
parent_id uuid references categories(id),
unique (site_id, kind, slug)
);
-- Regions are a separate hierarchy from categories (confirmed by the live sites'
-- job_listing_region taxonomy existing alongside job_listing_category) — e.g.
-- "North Shore" / "Marina District" as opposed to topical categories like
-- "Distilleries". Kept as its own table rather than overloading `categories`.
create table regions (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
slug text not null,
name text not null,
parent_id uuid references regions(id),
unique (site_id, slug)
);
-- ---------------------------------------------------------------------------
-- Media library
-- ---------------------------------------------------------------------------
create table media (
id uuid primary key default gen_random_uuid(),
site_id uuid references sites(id) on delete set null, -- null = shared across sites
storage_path text not null, -- Supabase Storage path or S3 key
alt_text text,
width int,
height int,
created_at timestamptz not null default now()
);
-- ---------------------------------------------------------------------------
-- Venue groups: links venue rows across sites that represent the same real
-- -world place (confirmed via recon: 51% of 6thStreet's venues are the same
-- physical venues as HeyAustin's, independently listed/edited on each site).
-- Rows stay separate per site (each keeps its own description/category/
-- editorial voice) — grouping exists only so the admin portal can surface
-- "this venue also exists on X" and so the ingestion pipeline scrapes each
-- real-world venue's calendar once, not once per site it's linked to.
-- A group table (rather than a raw self-referential FK on `venues`) avoids
-- "which row points to which" ambiguity and scales past 2 sites cleanly.
-- ---------------------------------------------------------------------------
create table venue_groups (
id uuid primary key default gen_random_uuid(),
notes text,
created_at timestamptz not null default now()
);
-- ---------------------------------------------------------------------------
-- Venues / partner listings (replaces Listify "listing" + WP Job Manager posts)
-- ---------------------------------------------------------------------------
create table venues (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
region_id uuid references regions(id),
venue_group_id uuid references venue_groups(id) on delete set null,
slug text not null,
name text not null,
description text,
address text,
lat double precision,
lng double precision,
website_url text, -- venue's own site, used by ingestion pipeline
calendar_url text, -- discovered/curated events-page URL, used by ingestion pipeline
phone text,
featured_media_id uuid references media(id),
is_featured boolean not null default false,
rating_avg numeric(3,2) not null default 0,
rating_count integer not null default 0,
status text not null default 'published' check (status in ('draft', 'published', 'archived')),
created_at timestamptz not null default now(),
updated_at timestamptz not null default now(),
unique (site_id, slug)
);
create table venue_categories (
venue_id uuid not null references venues(id) on delete cascade,
category_id uuid not null references categories(id) on delete cascade,
primary key (venue_id, category_id)
);
-- ---------------------------------------------------------------------------
-- Events (the highest-value, most time-consuming content type)
-- ---------------------------------------------------------------------------
create table events (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
venue_id uuid references venues(id) on delete set null,
slug text not null,
title text not null,
description text,
starts_at timestamptz not null,
ends_at timestamptz,
ticket_url text,
price_text text,
featured_media_id uuid references media(id),
is_featured boolean not null default false,
rating_avg numeric(3,2) not null default 0,
rating_count integer not null default 0,
status text not null default 'published' check (status in ('draft', 'published', 'archived')),
-- 'facebook_import' reflects the Facebook Events importer already running on all
-- 3 sites checked (1,709 of LakeTravis's events are facebook-sourced) — real
-- existing automation, distinct from the new AI ingestion pipeline (ai_ingested).
-- 'user_submitted' covers the public "Submit an Event" flow (see homepage
-- design) — lands in the same ingested_events-style review queue as
-- scraped events, just with a human source instead of a scraper.
source text not null default 'manual' check (source in ('manual', 'ai_ingested', 'facebook_import', 'user_submitted')),
ingested_event_id uuid, -- back-reference if this row originated from ingestion (fk added below)
created_at timestamptz not null default now(),
updated_at timestamptz not null default now(),
unique (site_id, slug)
);
create table event_categories (
event_id uuid not null references events(id) on delete cascade,
category_id uuid not null references categories(id) on delete cascade,
primary key (event_id, category_id)
);
-- ---------------------------------------------------------------------------
-- Artists/performers — a 5th content type, surfaced by the HeyAustin homepage
-- design (top-nav "ARTISTS", event cards crediting performers like "Shakey
-- Graves"). Distinct from venues: an artist plays at many venues over time.
-- ---------------------------------------------------------------------------
create table artists (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
slug text not null,
name text not null,
bio text,
featured_media_id uuid references media(id),
social_links jsonb not null default '{}', -- instagram, spotify, website, etc.
status text not null default 'published' check (status in ('draft', 'published', 'archived')),
created_at timestamptz not null default now(),
updated_at timestamptz not null default now(),
unique (site_id, slug)
);
create table event_artists (
event_id uuid not null references events(id) on delete cascade,
artist_id uuid not null references artists(id) on delete cascade,
primary key (event_id, artist_id)
);
-- ---------------------------------------------------------------------------
-- Deals — a 6th content type, also surfaced by the homepage design
-- ("Half Price Cocktails, Loro, 4:00-6:00 PM"). Deliberately separate from
-- `events`: a deal is a recurring offer on a weekly schedule, not a single
-- start/end timestamp, so it doesn't fit the events model.
-- ---------------------------------------------------------------------------
create table deals (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
venue_id uuid references venues(id) on delete set null,
slug text not null,
title text not null,
description text,
price_text text,
days_of_week smallint[] not null default '{}', -- 0=Sunday..6=Saturday; empty = every day
start_time time,
end_time time,
valid_from date,
valid_until date,
featured_media_id uuid references media(id),
is_featured boolean not null default false,
status text not null default 'published' check (status in ('draft', 'published', 'archived')),
created_at timestamptz not null default now(),
updated_at timestamptz not null default now(),
unique (site_id, slug)
);
create table deal_categories (
deal_id uuid not null references deals(id) on delete cascade,
category_id uuid not null references categories(id) on delete cascade,
primary key (deal_id, category_id)
);
-- ---------------------------------------------------------------------------
-- Newsletter signup (homepage "Never Miss Out" capture). Just the capture
-- lives here — actual sending is a third-party service (Resend/Mailchimp/
-- etc.), not something to build from scratch.
-- ---------------------------------------------------------------------------
create table newsletter_subscribers (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
email text not null,
subscribed_at timestamptz not null default now(),
unsubscribed_at timestamptz,
unique (site_id, email)
);
-- ---------------------------------------------------------------------------
-- Blog posts
-- ---------------------------------------------------------------------------
create table posts (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
author_id uuid references admin_users(id),
slug text not null,
title text not null,
body text not null, -- markdown or rich text JSON
excerpt text,
featured_media_id uuid references media(id),
status text not null default 'draft' check (status in ('draft', 'published', 'archived')),
published_at timestamptz,
seo_title text, -- AI-generated SEO fields, human-editable
seo_description text,
created_at timestamptz not null default now(),
updated_at timestamptz not null default now(),
unique (site_id, slug)
);
create table post_categories (
post_id uuid not null references posts(id) on delete cascade,
category_id uuid not null references categories(id) on delete cascade,
primary key (post_id, category_id)
);
-- ---------------------------------------------------------------------------
-- Rentals (vacation/property rentals) — confirmed as a 4th content type via
-- the live `rental` post type (e.g. laketravis.com/rental/lakefront-home-...)
-- ---------------------------------------------------------------------------
create table rentals (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
region_id uuid references regions(id),
slug text not null,
name text not null,
description text,
address text,
lat double precision,
lng double precision,
booking_url text, -- external booking link (Airbnb/VRBO/direct)
price_text text,
featured_media_id uuid references media(id),
is_featured boolean not null default false,
rating_avg numeric(3,2) not null default 0,
rating_count integer not null default 0,
status text not null default 'published' check (status in ('draft', 'published', 'archived')),
created_at timestamptz not null default now(),
updated_at timestamptz not null default now(),
unique (site_id, slug)
);
-- ---------------------------------------------------------------------------
-- Public app users — distinct from admin_users. Required by the live Listar
-- API's auth/register + wishlist/* routes; the mobile apps already have real
-- end users with accounts, not just anonymous browsing.
-- ---------------------------------------------------------------------------
create table app_users (
id uuid primary key default gen_random_uuid(),
email text unique not null,
full_name text,
created_at timestamptz not null default now()
);
-- Polymorphic favorites ("wishlist" in the current Listar API) across all
-- content types rather than 3 separate join tables.
create table favorites (
id uuid primary key default gen_random_uuid(),
app_user_id uuid not null references app_users(id) on delete cascade,
target_type text not null check (target_type in ('venue', 'event', 'rental', 'deal', 'artist')),
target_id uuid not null,
created_at timestamptz not null default now(),
unique (app_user_id, target_type, target_id)
);
-- Reviews feed the rating_avg/rating_count aggregates on venues/events/rentals
-- (confirmed live via schema.org AggregateRating markup + rating_avg/rating_count
-- fields already present in the Listar API's event/place responses).
create table reviews (
id uuid primary key default gen_random_uuid(),
app_user_id uuid not null references app_users(id) on delete cascade,
target_type text not null check (target_type in ('venue', 'event', 'rental', 'deal', 'artist')),
target_id uuid not null,
rating smallint not null check (rating between 1 and 5),
body text,
status text not null default 'published' check (status in ('published', 'flagged', 'removed')),
created_at timestamptz not null default now()
);
-- ---------------------------------------------------------------------------
-- AI-driven search: hybrid keyword (tsvector) + semantic (pgvector) index,
-- kept separate from content tables so re-embedding/re-indexing never requires
-- touching venues/events/posts/rentals directly. Feeds the "understands
-- complete tasks" natural-language search requirement.
-- ---------------------------------------------------------------------------
create table search_documents (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
target_type text not null check (target_type in ('venue', 'event', 'post', 'rental', 'deal', 'artist')),
target_id uuid not null,
text_content text not null, -- flattened title+description+category+region for embedding/search
tsv tsvector generated always as (to_tsvector('english', text_content)) stored,
embedding vector(1536), -- text-embedding-3-small dimension; adjust if model changes
updated_at timestamptz not null default now(),
unique (target_type, target_id)
);
create index idx_search_documents_tsv on search_documents using gin (tsv);
create index idx_search_documents_embedding on search_documents using hnsw (embedding vector_cosine_ops);
-- ---------------------------------------------------------------------------
-- AI ingestion pipeline: staging area, mirrors vee.py's diff output
-- ---------------------------------------------------------------------------
create table ingestion_runs (
id uuid primary key default gen_random_uuid(),
site_id uuid not null references sites(id) on delete cascade,
venue_id uuid not null references venues(id) on delete cascade,
status text not null check (status in ('running', 'success', 'error')),
error_message text,
started_at timestamptz not null default now(),
finished_at timestamptz
);
create table ingested_events (
id uuid primary key default gen_random_uuid(),
ingestion_run_id uuid not null references ingestion_runs(id) on delete cascade,
venue_id uuid not null references venues(id) on delete cascade,
raw_title text not null,
raw_date_text text,
parsed_starts_at timestamptz,
parsed_ends_at timestamptz,
source_url text not null,
match_status text not null check (match_status in ('new', 'matched_existing', 'needs_review')),
matched_event_id uuid references events(id),
review_status text not null default 'pending' check (review_status in ('pending', 'approved', 'rejected')),
reviewed_by uuid references admin_users(id),
reviewed_at timestamptz,
created_at timestamptz not null default now()
);
alter table events
add constraint events_ingested_event_id_fkey
foreign key (ingested_event_id) references ingested_events(id) on delete set null;
-- ---------------------------------------------------------------------------
-- Indexes for the hot paths: upcoming events per site, venue lookups
-- ---------------------------------------------------------------------------
create index idx_events_site_starts_at on events (site_id, starts_at) where status = 'published';
create index idx_events_site_featured on events (site_id, starts_at) where status = 'published' and is_featured;
create index idx_venues_site_status on venues (site_id, status);
create index idx_venues_geo on venues (lat, lng);
create index idx_venues_group on venues (venue_group_id) where venue_group_id is not null;
create index idx_rentals_site_status on rentals (site_id, status);
create index idx_posts_site_published on posts (site_id, published_at) where status = 'published';
create index idx_ingested_events_review_status on ingested_events (review_status);
create index idx_favorites_app_user on favorites (app_user_id);
create index idx_reviews_target on reviews (target_type, target_id);
create index idx_deals_site_status on deals (site_id, status);
create index idx_artists_site_status on artists (site_id, status);