Pydantic Essentials
Pydantic is a powerful Python library for data validation and settings management using Python type hints. It's the foundation of FastAPI and used by millions of developers worldwide.
What is Pydantic?
Pydantic provides runtime type validation and parsing using Python's type annotations. It automatically validates data, converts types, and provides clear error messages.
Key Features:
Why Use Pydantic?
Installation
# Basic installation
pip install pydantic
# With email validation support
pip install "pydantic[email]"
# Latest version (v2)
pip install -U pydantic
Basic Usage
Your First Model
from pydantic import BaseModel
class User(BaseModel):
id: int
name: str
email: str
is_active: bool = True # Default value
# Create instance with validation
user = User(id=1, name="John Doe", email="john@example.com")
print(user.id) # 1
print(user.name) # John Doe
print(user.is_active) # True
# Access as dictionary
print(user.model_dump())
# {'id': 1, 'name': 'John Doe', 'email': 'john@example.com', 'is_active': True}
# JSON serialization
print(user.model_dump_json())
# {"id":1,"name":"John Doe","email":"john@example.com","is_active":true}
Automatic Type Conversion
from pydantic import BaseModel
class Product(BaseModel):
id: int
name: str
price: float
in_stock: bool
# Pydantic converts types automatically
product = Product(
id="123", # str → int
name="Laptop",
price="999.99", # str → float
in_stock="yes" # str → bool
)
print(product.id) # 123 (int)
print(product.price) # 999.99 (float)
print(product.in_stock) # True (bool)
Validation Errors
from pydantic import BaseModel, ValidationError
class User(BaseModel):
id: int
name: str
age: int
try:
user = User(id="not_a_number", name="John", age=25)
except ValidationError as e:
print(e)
# Detailed error messages with location and type
Field Types
Basic Types
from pydantic import BaseModel
from typing import Optional
class Example(BaseModel):
# Strings
name: str
description: Optional[str] = None
# Numbers
integer: int
floating: float
# Boolean
is_active: bool
# Bytes
binary_data: bytes
# None/Optional
optional_field: Optional[str] = None
example = Example(
name="Test",
integer=42,
floating=3.14,
is_active=True,
binary_data=b"hello"
)
Collections
from pydantic import BaseModel
from typing import List, Dict, Set, Tuple
class Collections(BaseModel):
# Lists
tags: List[str]
numbers: List[int]
# Dictionaries
metadata: Dict[str, str]
counts: Dict[str, int]
# Sets
unique_ids: Set[int]
# Tuples
coordinates: Tuple[float, float]
rgb: Tuple[int, int, int]
data = Collections(
tags=["python", "pydantic", "validation"],
numbers=[1, 2, 3, 4, 5],
metadata={"author": "John", "version": "1.0"},
counts={"users": 100, "posts": 500},
unique_ids={1, 2, 3, 4, 5},
coordinates=(10.5, 20.3),
rgb=(255, 128, 0)
)
Dates and Times
from pydantic import BaseModel
from datetime import datetime, date, time, timedelta
class Schedule(BaseModel):
created_at: datetime
event_date: date
start_time: time
duration: timedelta
schedule = Schedule(
created_at="2024-01-15T10:30:00",
event_date="2024-02-20",
start_time="14:30:00",
duration="PT2H30M" # 2 hours 30 minutes
)
print(schedule.created_at) # datetime object
print(schedule.event_date) # date object
print(schedule.start_time) # time object
print(schedule.duration) # timedelta object
URLs and File Paths
from pydantic import BaseModel, HttpUrl, FilePath, DirectoryPath
from pathlib import Path
class Resources(BaseModel):
website: HttpUrl
api_endpoint: HttpUrl
config_file: FilePath
data_dir: DirectoryPath
log_path: Path
resources = Resources(
website="https://example.com",
api_endpoint="https://api.example.com/v1/users",
config_file="/etc/app/config.ini", # Must exist
data_dir="/var/data", # Must exist
log_path="/var/log/app.log" # Can be non-existent
)
Email and Other Special Types
from pydantic import BaseModel, EmailStr, UUID4, SecretStr, Json
import uuid
class UserProfile(BaseModel):
user_id: UUID4
email: EmailStr
password: SecretStr
config: Json
profile = UserProfile(
user_id="550e8400-e29b-41d4-a716-446655440000",
email="user@example.com",
password="secret123",
config='{"theme": "dark", "notifications": true}'
)
print(profile.password.get_secret_value()) # Access secret
print(profile.config) # Already parsed as dict
Nested Models
from pydantic import BaseModel
from typing import List
class Address(BaseModel):
street: str
city: str
country: str
postal_code: str
class User(BaseModel):
id: int
name: str
email: str
address: Address
user = User(
id=1,
name="John Doe",
email="john@example.com",
address={
"street": "123 Main St",
"city": "New York",
"country": "USA",
"postal_code": "10001"
}
)
print(user.address.city) # New York
# Lists of nested models
class Company(BaseModel):
name: str
employees: List[User]
company = Company(
name="Tech Corp",
employees=[
{
"id": 1,
"name": "Alice",
"email": "alice@example.com",
"address": {
"street": "456 Oak Ave",
"city": "Boston",
"country": "USA",
"postal_code": "02101"
}
},
{
"id": 2,
"name": "Bob",
"email": "bob@example.com",
"address": {
"street": "789 Pine Rd",
"city": "Seattle",
"country": "USA",
"postal_code": "98101"
}
}
]
)
print(len(company.employees)) # 2
print(company.employees[0].name) # Alice
Field Validation
Field with Constraints
from pydantic import BaseModel, Field
class User(BaseModel):
username: str = Field(..., min_length=3, max_length=20)
age: int = Field(..., ge=0, le=120) # ge = greater or equal
email: str = Field(..., pattern=r'^[\w\.-]+@[\w\.-]+\.\w+___CODE_BLOCK_PLACEHOLDER___10___CODE_BLOCK_PLACEHOLDER___#039;)
bio: str = Field(default="", max_length=500)
score: float = Field(..., gt=0, lt=100) # gt = greater than
# Valid
user = User(
username="john_doe",
age=25,
email="john@example.com",
score=85.5
)
# Invalid - raises ValidationError
try:
user = User(username="ab", age=25, email="john@example.com", score=85.5)
except ValidationError as e:
print(e) # username too short
Field Descriptions and Examples
from pydantic import BaseModel, Field
class Product(BaseModel):
name: str = Field(
description="Product name",
examples=["Laptop", "Smartphone"]
)
price: float = Field(
description="Price in USD",
examples=[999.99, 1299.00],
gt=0
)
tags: List[str] = Field(
default=[],
description="Product tags for categorization"
)
# Generate JSON schema
print(Product.model_json_schema())
Custom Validators
Field Validators
from pydantic import BaseModel, field_validator
class User(BaseModel):
username: str
password: str
email: str
@field_validator('username')
@classmethod
def username_alphanumeric(cls, v):
if not v.isalnum():
raise ValueError('Username must be alphanumeric')
return v
@field_validator('password')
@classmethod
def password_strength(cls, v):
if len(v) < 8:
raise ValueError('Password must be at least 8 characters')
if not any(c.isupper() for c in v):
raise ValueError('Password must contain uppercase letter')
if not any(c.isdigit() for c in v):
raise ValueError('Password must contain digit')
return v
@field_validator('email')
@classmethod
def email_lowercase(cls, v):
return v.lower()
# Valid
user = User(
username="johndoe",
password="SecurePass123",
email="JOHN@EXAMPLE.COM"
)
print(user.email) # john@example.com (lowercased)
# Invalid
try:
user = User(username="john-doe", password="weak", email="test@test.com")
except ValidationError as e:
print(e)
Model Validators
from pydantic import BaseModel, model_validator
class DateRange(BaseModel):
start_date: date
end_date: date
@model_validator(mode='after')
def check_dates(self):
if self.start_date > self.end_date:
raise ValueError('start_date must be before end_date')
return self
# Valid
range1 = DateRange(start_date="2024-01-01", end_date="2024-12-31")
# Invalid
try:
range2 = DateRange(start_date="2024-12-31", end_date="2024-01-01")
except ValidationError as e:
print(e)
Before and After Validators
from pydantic import BaseModel, field_validator
class User(BaseModel):
name: str
email: str
@field_validator('name', mode='before')
@classmethod
def clean_name(cls, v):
# Runs before type conversion
if isinstance(v, str):
return v.strip().title()
return v
@field_validator('email', mode='after')
@classmethod
def validate_email_domain(cls, v):
# Runs after type conversion
if not v.endswith('@company.com'):
raise ValueError('Must use company email')
return v
user = User(name=" john doe ", email="john@company.com")
print(user.name) # John Doe (cleaned and title-cased)
Model Configuration
from pydantic import BaseModel, ConfigDict
class User(BaseModel):
model_config = ConfigDict(
str_strip_whitespace=True, # Strip whitespace from strings
str_min_length=1, # Minimum string length
validate_default=True, # Validate default values
frozen=True, # Make model immutable
use_enum_values=True, # Use enum values instead of enum
populate_by_name=True # Allow population by field name
)
name: str
email: str
user = User(name=" John ", email=" john@example.com ")
print(user.name) # "John" (whitespace stripped)
# Frozen model - cannot modify
try:
user.name = "Jane"
except ValidationError:
print("Cannot modify frozen model")
Aliases and Field Names
from pydantic import BaseModel, Field
class User(BaseModel):
user_id: int = Field(alias='userId')
full_name: str = Field(alias='fullName')
email_address: str = Field(alias='emailAddress')
# Create using aliases
user = User(
userId=1,
fullName="John Doe",
emailAddress="john@example.com"
)
# Access using Python names
print(user.user_id) # 1
print(user.full_name) # John Doe
print(user.email_address) # john@example.com
# Export with aliases
print(user.model_dump(by_alias=True))
# {'userId': 1, 'fullName': 'John Doe', 'emailAddress': 'john@example.com'}
Working with JSON
Parsing JSON
from pydantic import BaseModel
class User(BaseModel):
id: int
name: str
email: str
# Parse JSON string
json_data = '{"id": 1, "name": "John", "email": "john@example.com"}'
user = User.model_validate_json(json_data)
# Parse from dict
dict_data = {"id": 2, "name": "Jane", "email": "jane@example.com"}
user = User.model_validate(dict_data)
Serializing to JSON
from pydantic import BaseModel
from datetime import datetime
class Event(BaseModel):
id: int
name: str
created_at: datetime
event = Event(id=1, name="Conference", created_at=datetime.now())
# To dict
event_dict = event.model_dump()
# To JSON string
event_json = event.model_dump_json()
# With formatting
event_json = event.model_dump_json(indent=2)
# Exclude fields
event_dict = event.model_dump(exclude={'created_at'})
# Include only specific fields
event_dict = event.model_dump(include={'id', 'name'})
Practical Examples
Example 1: API Request/Response Models
from pydantic import BaseModel, Field, EmailStr
from typing import Optional, List
from datetime import datetime
class CreateUserRequest(BaseModel):
username: str = Field(..., min_length=3, max_length=50)
email: EmailStr
password: str = Field(..., min_length=8)
full_name: str
class UserResponse(BaseModel):
id: int
username: str
email: str
full_name: str
created_at: datetime
is_active: bool = True
model_config = ConfigDict(from_attributes=True)
class UpdateUserRequest(BaseModel):
full_name: Optional[str] = None
email: Optional[EmailStr] = None
class PaginatedUserResponse(BaseModel):
users: List[UserResponse]
total: int
page: int
page_size: int
@property
def total_pages(self) -> int:
return (self.total + self.page_size - 1) // self.page_size
# Usage in FastAPI
"""
@app.post("/users", response_model=UserResponse)
async def create_user(user: CreateUserRequest):
# Create user in database
db_user = create_db_user(user)
return UserResponse.from_orm(db_user)
@app.get("/users", response_model=PaginatedUserResponse)
async def list_users(page: int = 1, page_size: int = 10):
users = get_users_from_db(page, page_size)
total = count_users()
return PaginatedUserResponse(
users=users,
total=total,
page=page,
page_size=page_size
)
"""
Example 2: Configuration Settings
from pydantic_settings import BaseSettings
from typing import Optional
class Settings(BaseSettings):
# Application settings
app_name: str = "My Application"
debug: bool = False
# Database settings
database_url: str
database_pool_size: int = 5
# API settings
api_key: str
api_timeout: int = 30
# Redis settings
redis_host: str = "localhost"
redis_port: int = 6379
# Email settings
smtp_host: Optional[str] = None
smtp_port: int = 587
smtp_user: Optional[str] = None
smtp_password: Optional[str] = None
model_config = ConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False
)
# Load from environment variables or .env file
settings = Settings()
print(settings.database_url)
print(settings.debug)
.env file:
DATABASE_URL=postgresql://user:pass@localhost/dbname
API_KEY=your-secret-key
DEBUG=true
Example 3: Data Validation Pipeline
from pydantic import BaseModel, field_validator, ValidationError
from typing import List, Optional
from datetime import date
class Transaction(BaseModel):
transaction_id: str
amount: float = Field(..., gt=0)
currency: str = Field(..., pattern=r'^[A-Z]{3}___CODE_BLOCK_PLACEHOLDER___22___CODE_BLOCK_PLACEHOLDER___#039;)
date: date
description: str
category: str
@field_validator('category')
@classmethod
def validate_category(cls, v):
valid_categories = ['food', 'transport', 'entertainment', 'utilities', 'other']
if v.lower() not in valid_categories:
raise ValueError(f'Category must be one of {valid_categories}')
return v.lower()
class BulkTransactionImport(BaseModel):
transactions: List[Transaction]
@property
def total_amount(self) -> float:
return sum(t.amount for t in self.transactions)
def get_by_category(self, category: str) -> List[Transaction]:
return [t for t in self.transactions if t.category == category]
# Import and validate transactions
raw_data = {
"transactions": [
{
"transaction_id": "TX001",
"amount": 45.50,
"currency": "USD",
"date": "2024-01-15",
"description": "Grocery shopping",
"category": "food"
},
{
"transaction_id": "TX002",
"amount": 12.00,
"currency": "USD",
"date": "2024-01-16",
"description": "Bus ticket",
"category": "transport"
}
]
}
try:
bulk_import = BulkTransactionImport(**raw_data)
print(f"Total transactions: {len(bulk_import.transactions)}")
print(f"Total amount: ${bulk_import.total_amount:.2f}")
print(f"Food transactions: {len(bulk_import.get_by_category('food'))}")
except ValidationError as e:
print(f"Validation errors: {e}")
Example 4: Database Model with ORM
from pydantic import BaseModel, ConfigDict
from typing import Optional
from datetime import datetime
# SQLAlchemy model (example)
"""
class UserDB:
id: int
username: str
email: str
created_at: datetime
"""
class UserSchema(BaseModel):
id: int
username: str
email: str
created_at: datetime
model_config = ConfigDict(from_attributes=True)
class UserCreate(BaseModel):
username: str
email: str
password: str
class UserUpdate(BaseModel):
username: Optional[str] = None
email: Optional[str] = None
password: Optional[str] = None
# Usage
"""
# Create
def create_user(user: UserCreate):
db_user = UserDB(**user.model_dump())
db.add(db_user)
db.commit()
return UserSchema.from_orm(db_user)
# Read
def get_user(user_id: int):
db_user = db.query(UserDB).filter(UserDB.id == user_id).first()
if db_user:
return UserSchema.from_orm(db_user)
return None
# Update
def update_user(user_id: int, user: UserUpdate):
db_user = db.query(UserDB).filter(UserDB.id == user_id).first()
for field, value in user.model_dump(exclude_unset=True).items():
setattr(db_user, field, value)
db.commit()
return UserSchema.from_orm(db_user)
"""
Advanced Features
Discriminated Unions
from pydantic import BaseModel, Field
from typing import Literal, Union
class Cat(BaseModel):
pet_type: Literal['cat']
meows: int
class Dog(BaseModel):
pet_type: Literal['dog']
barks: float
class Pet(BaseModel):
animal: Union[Cat, Dog] = Field(discriminator='pet_type')
# Automatically determines which model to use
cat = Pet(animal={'pet_type': 'cat', 'meows': 5})
dog = Pet(animal={'pet_type': 'dog', 'barks': 3.5})
print(type(cat.animal)) # Cat
print(type(dog.animal)) # Dog
Generic Models
from pydantic import BaseModel
from typing import Generic, TypeVar, List
T = TypeVar('T')
class Response(BaseModel, Generic[T]):
data: T
message: str
success: bool
class User(BaseModel):
id: int
name: str
# Use with different types
user_response = Response[User](
data=User(id=1, name="John"),
message="User retrieved",
success=True
)
list_response = Response[List[User]](
data=[User(id=1, name="John"), User(id=2, name="Jane")],
message="Users retrieved",
success=True
)
Computed Fields
from pydantic import BaseModel, computed_field
class Rectangle(BaseModel):
width: float
height: float
@computed_field
@property
def area(self) -> float:
return self.width * self.height
@computed_field
@property
def perimeter(self) -> float:
return 2 * (self.width + self.height)
rect = Rectangle(width=10, height=5)
print(rect.area) # 50.0
print(rect.perimeter) # 30.0
# Included in serialization
print(rect.model_dump())
# {'width': 10.0, 'height': 5.0, 'area': 50.0, 'perimeter': 30.0}
Best Practices
1. Use Type Hints Correctly
from typing import Optional, List
class Good(BaseModel):
# Clear, explicit types
name: str
age: int
tags: List[str]
description: Optional[str] = None
class Bad(BaseModel):
# Avoid: no type hints or Any
name: str
data: Any # Too vague
2. Validate at Boundaries
# Good: Validate at API boundaries
@app.post("/users")
async def create_user(user: UserCreate): # Validated here
# user is already validated
db_user = save_to_db(user)
return db_user
# Bad: Manual validation everywhere
@app.post("/users")
async def create_user(data: dict):
if 'username' not in data:
raise ValueError("Missing username")
if 'email' not in data:
raise ValueError("Missing email")
# ... more validation
3. Use Appropriate Models
# Separate models for different operations
class UserCreate(BaseModel):
username: str
email: str
password: str
class UserUpdate(BaseModel):
email: Optional[str] = None
full_name: Optional[str] = None
class UserResponse(BaseModel):
id: int
username: str
email: str
# No password in response!
4. Document Your Models
class User(BaseModel):
"""
User model for authentication and profile management.
"""
username: str = Field(
description="Unique username for login",
examples=["johndoe"]
)
email: EmailStr = Field(
description="User's email address",
examples=["john@example.com"]
)
age: Optional[int] = Field(
None,
description="User's age in years",
ge=0,
le=150
)
Common Patterns
Optional Fields
from typing import Optional
class User(BaseModel):
# Required field
username: str
# Optional with default None
bio: Optional[str] = None
# Optional with default value
is_active: bool = True
# Optional with Field
age: Optional[int] = Field(None, ge=0, le=120)
Exclude Fields from Serialization
class User(BaseModel):
username: str
email: str
password: str = Field(exclude=True) # Never serialized
user = User(username="john", email="john@example.com", password="secret")
print(user.model_dump()) # password not included
Default Factories
from datetime import datetime
from typing import List
class Post(BaseModel):
title: str
content: str
created_at: datetime = Field(default_factory=datetime.now)
tags: List[str] = Field(default_factory=list)
post = Post(title="My Post", content="Content here")
print(post.created_at) # Current timestamp
print(post.tags) # []
Error Handling
from pydantic import BaseModel, ValidationError
class User(BaseModel):
id: int
name: str
email: str
try:
user = User(id="invalid", name="John")
except ValidationError as e:
print(e.errors())
# [
# {
# 'type': 'int_parsing',
# 'loc': ('id',),
# 'msg': 'Input should be a valid integer',
# 'input': 'invalid'
# },
# {
# 'type': 'missing',
# 'loc': ('email',),
# 'msg': 'Field required'
# }
# ]
# Custom error handling
for error in e.errors():
field = error['loc'][0]
message = error['msg']
print(f"Error in {field}: {message}")
Testing with Pydantic
import pytest
from pydantic import ValidationError
def test_user_creation():
user = User(id=1, name="John", email="john@example.com")
assert user.id == 1
assert user.name == "John"
def test_user_validation():
with pytest.raises(ValidationError):
User(id="invalid", name="John", email="john@example.com")
def test_user_serialization():
user = User(id=1, name="John", email="john@example.com")
data = user.model_dump()
assert data == {
'id': 1,
'name': 'John',
'email': 'john@example.com'
}
Key Takeaways
Additional Resources
Official Documentation:
Integration:
Community:
Next Steps
Pydantic makes data validation in Python a breeze. Start small, validate early, and let Pydantic catch errors before they become problems!