From a1f33e2046ee2d79d92f46167bbc2aefc0edd64b Mon Sep 17 00:00:00 2001 From: RamonCalvo Date: Sat, 28 Mar 2026 18:37:29 -0600 Subject: [PATCH] feat: add LLM-based bookmark categorization and README Cron-triggered endpoint that uses Claude to auto-categorize uncategorized bookmarks. Includes full project documentation. Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 111 ++++++++++++++++++++++++++++++ backend/app/categorizer.py | 47 +++++++++++++ backend/app/routers/categorize.py | 13 ++++ 3 files changed, 171 insertions(+) create mode 100644 README.md create mode 100644 backend/app/categorizer.py create mode 100644 backend/app/routers/categorize.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..3e12d22 --- /dev/null +++ b/README.md @@ -0,0 +1,111 @@ +# favs-my + +API de bookmarks personales con categorización automática via LLM. + +## Stack + +- **API:** FastAPI (Python 3.12) +- **DB:** PostgreSQL 16 +- **LLM:** Claude (Haiku por defecto) +- **Infra:** Docker Compose + +## Setup + +```bash +cp .env.example .env +# editar .env con tu ANTHROPIC_API_KEY +docker compose up --build +``` + +La API queda en `http://localhost:8000`. La DB en el puerto `5433`. + +## Uso + +### Crear bookmark + +```bash +curl -X POST http://localhost:8000/api/bookmarks \ + -H "Content-Type: application/json" \ + -d '{"title":"FastAPI docs","link":"https://fastapi.tiangolo.com"}' +``` + +### Listar todos + +```bash +curl http://localhost:8000/api/bookmarks +``` + +### Filtrar por categoría + +```bash +curl http://localhost:8000/api/bookmarks?category=python +``` + +### Obtener uno + +```bash +curl http://localhost:8000/api/bookmarks/{id} +``` + +### Actualizar + +```bash +curl -X PUT http://localhost:8000/api/bookmarks/{id} \ + -H "Content-Type: application/json" \ + -d '{"title":"Nuevo titulo"}' +``` + +### Eliminar + +```bash +curl -X DELETE http://localhost:8000/api/bookmarks/{id} +``` + +### Categorizar pendientes (LLM) + +```bash +curl -X POST http://localhost:8000/api/categorize +``` + +Toma los bookmarks sin categoría (`category: null`), los envía a Claude y asigna categorías automáticamente. + +## Cron + +Para categorizar automáticamente cada 30 minutos: + +```bash +crontab -e +``` + +``` +*/30 * * * * curl -s -X POST http://localhost:8000/api/categorize +``` + +## Variables de entorno + +| Variable | Default | Descripción | +|---|---|---| +| `DATABASE_URL` | `postgresql+asyncpg://favs:favs@favs-db:5432/favs` | Conexión a PostgreSQL | +| `ANTHROPIC_API_KEY` | — | API key de Anthropic (requerida para categorizar) | +| `CATEGORIZE_MODEL` | `claude-haiku-4-5-20251001` | Modelo a usar para categorización | + +## Estructura + +``` +├── docker-compose.yml +├── .env.example +└── backend/ + ├── Dockerfile + ├── requirements.txt + └── app/ + ├── main.py # Entrypoint, lifespan, routers + ├── config.py # Settings via env vars + ├── database.py # Engine y sesión async + ├── models.py # Modelo Bookmark (SQLAlchemy) + ├── schemas.py # Pydantic schemas + ├── categorizer.py # Lógica de categorización con LLM + └── routers/ + ├── bookmarks.py # CRUD /api/bookmarks + ├── categorize.py # POST /api/categorize + └── health.py # GET /api/health +``` diff --git a/backend/app/categorizer.py b/backend/app/categorizer.py new file mode 100644 index 0000000..1ff6298 --- /dev/null +++ b/backend/app/categorizer.py @@ -0,0 +1,47 @@ +import json +import uuid as _uuid + +import anthropic +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.config import settings +from app.models import Bookmark + +SYSTEM_PROMPT = """You categorize bookmarks. Given a list of bookmarks (title + url), +assign each one a short category label (1-2 words, lowercase, e.g. "python", "devops", "design", "news", "ai/ml"). + +Respond with a JSON array of objects: [{"id": "...", "category": "..."}] +Only return the JSON, nothing else.""" + + +async def categorize_pending(db: AsyncSession) -> int: + result = await db.execute( + select(Bookmark).where(Bookmark.category.is_(None)).limit(50) + ) + bookmarks = result.scalars().all() + if not bookmarks: + return 0 + + items = [ + {"id": str(b.id), "title": b.title, "link": b.link} for b in bookmarks + ] + + client = anthropic.Anthropic(api_key=settings.anthropic_api_key) + response = client.messages.create( + model=settings.categorize_model, + max_tokens=1024, + system=SYSTEM_PROMPT, + messages=[{"role": "user", "content": json.dumps(items)}], + ) + + categories = json.loads(response.content[0].text) + lookup = {b.id: b for b in bookmarks} + + for entry in categories: + bookmark = lookup.get(_uuid.UUID(entry["id"])) + if bookmark and entry.get("category"): + bookmark.category = entry["category"] + + await db.commit() + return len(categories) diff --git a/backend/app/routers/categorize.py b/backend/app/routers/categorize.py new file mode 100644 index 0000000..a39afba --- /dev/null +++ b/backend/app/routers/categorize.py @@ -0,0 +1,13 @@ +from fastapi import APIRouter, Depends +from sqlalchemy.ext.asyncio import AsyncSession + +from app.categorizer import categorize_pending +from app.database import get_db + +router = APIRouter(tags=["categorize"]) + + +@router.post("/api/categorize") +async def run_categorize(db: AsyncSession = Depends(get_db)): + count = await categorize_pending(db) + return {"categorized": count}