Semantic Search with Policy Guardrails
This guide shows how to add Control Zero policy enforcement to a semantic search system, controlling who can search what data and which embedding models are used.
What You Will Build
A semantic search service that:
- Generates embeddings for queries
- Searches a vector database
- Enforces access control on data collections
- Restricts embedding model usage
Implementation
Setup
pip install controlzero openai chromadb
import controlzero
import openai
import chromadb
cz = controlzero.ControlZero()
cz.initialize()
openai_client = openai.OpenAI()
chroma = chromadb.Client()
Search with Policy Enforcement
def search(
query: str,
collection_name: str,
agent_id: str,
n_results: int = 10,
) -> list[dict]:
"""Search a collection with policy enforcement."""
# Enforce: can this agent access this collection?
cz.enforce(
action="data.read",
resource=f"vectorstore/{collection_name}",
context={"agent_id": agent_id},
)
# Enforce: can this agent use embeddings?
cz.enforce(
action="embedding.generate",
resource="model/text-embedding-3-small",
context={"agent_id": agent_id},
)
# Generate query embedding
response = openai_client.embeddings.create(
model="text-embedding-3-small",
input=query,
)
query_embedding = response.data[0].embedding
# Search the collection
collection = chroma.get_collection(collection_name)
results = collection.query(
query_embeddings=[query_embedding],
n_results=n_results,
)
return [
{"document": doc, "metadata": meta, "distance": dist}
for doc, meta, dist in zip(
results["documents"][0],
results["metadatas"][0],
results["distances"][0],
)
]
Multi-Collection Search
def search_across_collections(
query: str,
collections: list[str],
agent_id: str,
) -> dict[str, list[dict]]:
"""Search multiple collections, skipping those the agent cannot access."""
results = {}
for collection_name in collections:
decision = cz.check(
action="data.read",
resource=f"vectorstore/{collection_name}",
context={"agent_id": agent_id},
)
if decision.allowed:
results[collection_name] = search(
query, collection_name, agent_id
)
else:
# Log that access was denied, but continue with other collections
results[collection_name] = []
return results
Example Policy
{
"name": "semantic-search-policy",
"rules": [
{
"effect": "allow",
"action": "data.read",
"resource": "vectorstore/public-docs"
},
{
"effect": "allow",
"action": "data.read",
"resource": "vectorstore/product-catalog"
},
{
"effect": "deny",
"action": "data.read",
"resource": "vectorstore/financial-reports"
},
{
"effect": "allow",
"action": "embedding.generate",
"resource": "model/text-embedding-3-small"
}
]
}