my-favs/backend/app/categorizer.py
RamonCalvo a1f33e2046 feat: add LLM-based bookmark categorization and README
Cron-triggered endpoint that uses Claude to auto-categorize
uncategorized bookmarks. Includes full project documentation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 18:37:29 -06:00

47 lines
1.4 KiB
Python

import json
import uuid as _uuid
import anthropic
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.models import Bookmark
SYSTEM_PROMPT = """You categorize bookmarks. Given a list of bookmarks (title + url),
assign each one a short category label (1-2 words, lowercase, e.g. "python", "devops", "design", "news", "ai/ml").
Respond with a JSON array of objects: [{"id": "...", "category": "..."}]
Only return the JSON, nothing else."""
async def categorize_pending(db: AsyncSession) -> int:
result = await db.execute(
select(Bookmark).where(Bookmark.category.is_(None)).limit(50)
)
bookmarks = result.scalars().all()
if not bookmarks:
return 0
items = [
{"id": str(b.id), "title": b.title, "link": b.link} for b in bookmarks
]
client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
response = client.messages.create(
model=settings.categorize_model,
max_tokens=1024,
system=SYSTEM_PROMPT,
messages=[{"role": "user", "content": json.dumps(items)}],
)
categories = json.loads(response.content[0].text)
lookup = {b.id: b for b in bookmarks}
for entry in categories:
bookmark = lookup.get(_uuid.UUID(entry["id"]))
if bookmark and entry.get("category"):
bookmark.category = entry["category"]
await db.commit()
return len(categories)