Skip to main content

Semantic Search with Policy Guardrails

This guide shows how to add Control Zero policy enforcement to a semantic search system, controlling who can search what data and which embedding models are used.

What You Will Build

A semantic search service that:

  • Generates embeddings for queries
  • Searches a vector database
  • Enforces access control on data collections
  • Restricts embedding model usage

Implementation

Setup

pip install controlzero openai chromadb
import controlzero
import openai
import chromadb

cz = controlzero.ControlZero()
cz.initialize()

openai_client = openai.OpenAI()
chroma = chromadb.Client()

Search with Policy Enforcement

def search(
query: str,
collection_name: str,
agent_id: str,
n_results: int = 10,
) -> list[dict]:
"""Search a collection with policy enforcement."""

# Enforce: can this agent access this collection?
cz.enforce(
action="data.read",
resource=f"vectorstore/{collection_name}",
context={"agent_id": agent_id},
)

# Enforce: can this agent use embeddings?
cz.enforce(
action="embedding.generate",
resource="model/text-embedding-3-small",
context={"agent_id": agent_id},
)

# Generate query embedding
response = openai_client.embeddings.create(
model="text-embedding-3-small",
input=query,
)
query_embedding = response.data[0].embedding

# Search the collection
collection = chroma.get_collection(collection_name)
results = collection.query(
query_embeddings=[query_embedding],
n_results=n_results,
)

return [
{"document": doc, "metadata": meta, "distance": dist}
for doc, meta, dist in zip(
results["documents"][0],
results["metadatas"][0],
results["distances"][0],
)
]
def search_across_collections(
query: str,
collections: list[str],
agent_id: str,
) -> dict[str, list[dict]]:
"""Search multiple collections, skipping those the agent cannot access."""

results = {}
for collection_name in collections:
decision = cz.check(
action="data.read",
resource=f"vectorstore/{collection_name}",
context={"agent_id": agent_id},
)

if decision.allowed:
results[collection_name] = search(
query, collection_name, agent_id
)
else:
# Log that access was denied, but continue with other collections
results[collection_name] = []

return results

Example Policy

{
"name": "semantic-search-policy",
"rules": [
{
"effect": "allow",
"action": "data.read",
"resource": "vectorstore/public-docs"
},
{
"effect": "allow",
"action": "data.read",
"resource": "vectorstore/product-catalog"
},
{
"effect": "deny",
"action": "data.read",
"resource": "vectorstore/financial-reports"
},
{
"effect": "allow",
"action": "embedding.generate",
"resource": "model/text-embedding-3-small"
}
]
}

Next Steps

  • See the RAG Guide for adding generation on top of retrieval.
  • Learn about Policies for fine-grained access control.