Database Migrations in Python
Database migrations are version control for your database schema. They allow you to track, manage, and deploy database changes in a systematic and repeatable way.
What are Database Migrations?
Database migrations are scripts that modify your database schema over time - adding tables, columns, indexes, or changing existing structures. Think of them as Git commits for your database.
Key Concepts:
Why Use Migrations?
Alembic
Alembic is the most popular database migration tool for Python, designed specifically to work with SQLAlchemy.
Features:
Installation
# Install Alembic
pip install alembic
# With SQLAlchemy
pip install alembic sqlalchemy
# With specific database driver
pip install alembic sqlalchemy psycopg2-binary # PostgreSQL
pip install alembic sqlalchemy pymysql # MySQL
pip install alembic sqlalchemy # SQLite (built-in)
Getting Started
Initialize Alembic
# Initialize Alembic in your project
alembic init alembic
# Or with async support
alembic init -t async alembic
This creates:
project/
├── alembic/
│ ├── versions/ # Migration scripts
│ ├── env.py # Migration environment
│ ├── script.py.mako # Template for new migrations
│ └── README
├── alembic.ini # Alembic configuration
└── models.py # Your SQLAlchemy models
Configure Database Connection
alembic.ini:# alembic.ini
[alembic]
script_location = alembic
sqlalchemy.url = postgresql://user:password@localhost/dbname
# Or use environment variable
# sqlalchemy.url =
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
Using Environment Variables:
# alembic/env.py
from sqlalchemy import engine_from_config, pool
from alembic import context
import os
# Get database URL from environment
config = context.config
config.set_main_option(
'sqlalchemy.url',
os.getenv('DATABASE_URL', 'sqlite:///./app.db')
)
Link SQLAlchemy Models
alembic/env.py:from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
# Import your models
from myapp.models import Base # Your declarative base
from myapp import models # Import all models
# this is the Alembic Config object
config = context.config
# Interpret the config file for Python logging
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Set target metadata for autogenerate
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode."""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
Creating Migrations
Auto-Generate Migrations
Alembic can automatically detect changes in your SQLAlchemy models.
# Create migration from model changes
alembic revision --autogenerate -m "create users table"
# This generates a file like:
# alembic/versions/abc123_create_users_table.py
Example Models:
# models.py
from sqlalchemy import Column, Integer, String, DateTime, Boolean
from sqlalchemy.ext.declarative import declarative_base
from datetime import datetime
Base = declarative_base()
class User(Base):
__tablename__ = 'users'
id = Column(Integer, primary_key=True)
username = Column(String(50), unique=True, nullable=False)
email = Column(String(100), unique=True, nullable=False)
is_active = Column(Boolean, default=True)
created_at = Column(DateTime, default=datetime.utcnow)
Generated Migration:
"""create users table
Revision ID: abc123def456
Revises:
Create Date: 2024-01-15 10:30:00.000000
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic
revision = 'abc123def456'
down_revision = None
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic ###
op.create_table(
'users',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('username', sa.String(length=50), nullable=False),
sa.Column('email', sa.String(length=100), nullable=False),
sa.Column('is_active', sa.Boolean(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('email'),
sa.UniqueConstraint('username')
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic ###
op.drop_table('users')
# ### end Alembic commands ###
Manual Migrations
Sometimes you need to write migrations manually.
# Create empty migration
alembic revision -m "add user role column"
Manual Migration Example:
"""add user role column
Revision ID: xyz789
Revises: abc123def456
Create Date: 2024-01-16 14:20:00.000000
"""
from alembic import op
import sqlalchemy as sa
revision = 'xyz789'
down_revision = 'abc123def456'
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add column
op.add_column('users', sa.Column('role', sa.String(20), nullable=True))
# Set default value for existing rows
op.execute("UPDATE users SET role = 'user' WHERE role IS NULL")
# Make column non-nullable
op.alter_column('users', 'role', nullable=False)
# Add index
op.create_index('ix_users_role', 'users', ['role'])
def downgrade() -> None:
op.drop_index('ix_users_role', 'users')
op.drop_column('users', 'role')
Running Migrations
Apply Migrations
# Apply all pending migrations
alembic upgrade head
# Apply specific number of migrations
alembic upgrade +2
# Apply to specific revision
alembic upgrade abc123
# Show SQL without executing
alembic upgrade head --sql
Rollback Migrations
# Rollback one migration
alembic downgrade -1
# Rollback to specific revision
alembic downgrade abc123
# Rollback all migrations
alembic downgrade base
# Show SQL for rollback
alembic downgrade -1 --sql
Check Migration Status
# Show current version
alembic current
# Show migration history
alembic history
# Show pending migrations
alembic history --verbose
# Show migration details
alembic show abc123
Common Migration Operations
Table Operations
def upgrade():
# Create table
op.create_table(
'posts',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('title', sa.String(200), nullable=False),
sa.Column('content', sa.Text()),
sa.Column('user_id', sa.Integer(), sa.ForeignKey('users.id')),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now())
)
# Drop table
op.drop_table('old_table')
# Rename table
op.rename_table('old_name', 'new_name')
Column Operations
def upgrade():
# Add column
op.add_column('users', sa.Column('phone', sa.String(20)))
# Add column with default
op.add_column(
'users',
sa.Column('status', sa.String(20), server_default='active')
)
# Drop column
op.drop_column('users', 'old_column')
# Rename column
op.alter_column('users', 'old_name', new_column_name='new_name')
# Change column type
op.alter_column('users', 'age', type_=sa.BigInteger())
# Change nullable
op.alter_column('users', 'email', nullable=False)
# Add default value
op.alter_column('users', 'is_active', server_default='true')
Index Operations
def upgrade():
# Create index
op.create_index('ix_users_email', 'users', ['email'])
# Create unique index
op.create_index('ix_users_username', 'users', ['username'], unique=True)
# Create composite index
op.create_index('ix_users_name', 'users', ['first_name', 'last_name'])
# Drop index
op.drop_index('ix_users_email', 'users')
# PostgreSQL: Create partial index
op.create_index(
'ix_active_users',
'users',
['email'],
postgresql_where=sa.text('is_active = true')
)
Constraint Operations
def upgrade():
# Add primary key
op.create_primary_key('pk_users', 'users', ['id'])
# Add foreign key
op.create_foreign_key(
'fk_posts_user_id',
'posts', 'users',
['user_id'], ['id'],
ondelete='CASCADE'
)
# Drop foreign key
op.drop_constraint('fk_posts_user_id', 'posts', type_='foreignkey')
# Add unique constraint
op.create_unique_constraint('uq_users_email', 'users', ['email'])
# Add check constraint
op.create_check_constraint(
'ck_users_age',
'users',
'age >= 18'
)
# Drop constraint
op.drop_constraint('uq_users_email', 'users', type_='unique')
Data Migrations
from alembic import op
import sqlalchemy as sa
from sqlalchemy.sql import table, column
def upgrade():
# Define table structure for data manipulation
users = table(
'users',
column('id', sa.Integer),
column('status', sa.String),
)
# Update data
op.execute(
users.update()
.where(users.c.status == 'pending')
.values(status='active')
)
# Insert data
op.bulk_insert(
users,
[
{'status': 'admin'},
{'status': 'user'},
]
)
# Raw SQL
op.execute("UPDATE users SET status = 'verified' WHERE email_verified = true")
Advanced Patterns
Multiple Database Support
# alembic/env.py
from sqlalchemy import engine_from_config
def run_migrations_online() -> None:
# Get database from environment
db_type = os.getenv('DB_TYPE', 'postgresql')
if db_type == 'postgresql':
url = os.getenv('POSTGRESQL_URL')
elif db_type == 'mysql':
url = os.getenv('MYSQL_URL')
else:
url = os.getenv('SQLITE_URL')
config.set_main_option('sqlalchemy.url', url)
connectable = engine_from_config(
config.get_section(config.config_ini_section),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata
)
with context.begin_transaction():
context.run_migrations()
Database-Specific Operations
def upgrade():
# PostgreSQL specific
if context.get_context().dialect.name == 'postgresql':
op.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp"')
op.create_index(
'ix_users_email_gin',
'users',
['email'],
postgresql_using='gin'
)
# MySQL specific
elif context.get_context().dialect.name == 'mysql':
op.execute('SET FOREIGN_KEY_CHECKS=0')
# ... operations
op.execute('SET FOREIGN_KEY_CHECKS=1')
Conditional Migrations
from alembic import op
from sqlalchemy import inspect
def upgrade():
# Check if column exists
conn = op.get_bind()
inspector = inspect(conn)
columns = [col['name'] for col in inspector.get_columns('users')]
if 'phone' not in columns:
op.add_column('users', sa.Column('phone', sa.String(20)))
# Check if table exists
tables = inspector.get_table_names()
if 'old_table' in tables:
op.drop_table('old_table')
Branch Management
# Create branch for feature development
alembic revision -m "feature branch" --head=head --branch-label=feature
# Merge branches
alembic merge -m "merge feature" heads
# Show branches
alembic branches
Practical Examples
Example 1: Blog Application Migrations
Initial Schema:"""create blog schema
Revision ID: 001
"""
from alembic import op
import sqlalchemy as sa
def upgrade():
# Users table
op.create_table(
'users',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('username', sa.String(50), unique=True, nullable=False),
sa.Column('email', sa.String(100), unique=True, nullable=False),
sa.Column('password_hash', sa.String(255), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now())
)
# Posts table
op.create_table(
'posts',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('title', sa.String(200), nullable=False),
sa.Column('slug', sa.String(200), unique=True, nullable=False),
sa.Column('content', sa.Text(), nullable=False),
sa.Column('published', sa.Boolean(), default=False),
sa.Column('author_id', sa.Integer(), sa.ForeignKey('users.id')),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column('updated_at', sa.DateTime(), onupdate=sa.func.now())
)
# Comments table
op.create_table(
'comments',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('content', sa.Text(), nullable=False),
sa.Column('post_id', sa.Integer(), sa.ForeignKey('posts.id', ondelete='CASCADE')),
sa.Column('user_id', sa.Integer(), sa.ForeignKey('users.id')),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now())
)
# Indexes
op.create_index('ix_posts_slug', 'posts', ['slug'])
op.create_index('ix_posts_author_id', 'posts', ['author_id'])
op.create_index('ix_comments_post_id', 'comments', ['post_id'])
def downgrade():
op.drop_table('comments')
op.drop_table('posts')
op.drop_table('users')
Add Tags Feature:
"""add tags feature
Revision ID: 002
Revises: 001
"""
from alembic import op
import sqlalchemy as sa
def upgrade():
# Tags table
op.create_table(
'tags',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('name', sa.String(50), unique=True, nullable=False),
sa.Column('slug', sa.String(50), unique=True, nullable=False)
)
# Post-Tag association table
op.create_table(
'post_tags',
sa.Column('post_id', sa.Integer(), sa.ForeignKey('posts.id', ondelete='CASCADE')),
sa.Column('tag_id', sa.Integer(), sa.ForeignKey('tags.id', ondelete='CASCADE')),
sa.PrimaryKeyConstraint('post_id', 'tag_id')
)
# Indexes
op.create_index('ix_tags_slug', 'tags', ['slug'])
def downgrade():
op.drop_table('post_tags')
op.drop_table('tags')
Example 2: Add Full-Text Search
"""add full text search
Revision ID: 003
Revises: 002
"""
from alembic import op
import sqlalchemy as sa
def upgrade():
# PostgreSQL full-text search
if op.get_context().dialect.name == 'postgresql':
# Add tsvector column
op.add_column(
'posts',
sa.Column('search_vector', sa.dialects.postgresql.TSVECTOR())
)
# Create GIN index
op.create_index(
'ix_posts_search_vector',
'posts',
['search_vector'],
postgresql_using='gin'
)
# Create trigger to auto-update search vector
op.execute("""
CREATE TRIGGER posts_search_vector_update
BEFORE INSERT OR UPDATE ON posts
FOR EACH ROW EXECUTE FUNCTION
tsvector_update_trigger(
search_vector, 'pg_catalog.english',
title, content
);
""")
# Update existing rows
op.execute("""
UPDATE posts
SET search_vector = to_tsvector('english', title || ' ' || content);
""")
def downgrade():
if op.get_context().dialect.name == 'postgresql':
op.execute("DROP TRIGGER IF EXISTS posts_search_vector_update ON posts")
op.drop_index('ix_posts_search_vector', 'posts')
op.drop_column('posts', 'search_vector')
Example 3: Data Migration with Transformation
"""migrate user status to enum
Revision ID: 004
Revises: 003
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
def upgrade():
# Create enum type (PostgreSQL)
status_enum = postgresql.ENUM(
'active', 'inactive', 'suspended',
name='user_status',
create_type=True
)
# Add new column with enum type
op.add_column(
'users',
sa.Column('status_new', status_enum, nullable=True)
)
# Migrate data
op.execute("""
UPDATE users
SET status_new = CASE
WHEN is_active = true THEN 'active'::user_status
WHEN is_suspended = true THEN 'suspended'::user_status
ELSE 'inactive'::user_status
END
""")
# Make new column non-nullable
op.alter_column('users', 'status_new', nullable=False)
# Drop old columns
op.drop_column('users', 'is_active')
op.drop_column('users', 'is_suspended')
# Rename new column
op.alter_column('users', 'status_new', new_column_name='status')
def downgrade():
# Add old columns back
op.add_column('users', sa.Column('is_active', sa.Boolean()))
op.add_column('users', sa.Column('is_suspended', sa.Boolean()))
# Migrate data back
op.execute("""
UPDATE users
SET is_active = CASE WHEN status = 'active' THEN true ELSE false END,
is_suspended = CASE WHEN status = 'suspended' THEN true ELSE false END
""")
# Drop enum column
op.drop_column('users', 'status')
# Drop enum type
op.execute('DROP TYPE user_status')
Testing Migrations
# tests/test_migrations.py
import pytest
from alembic import command
from alembic.config import Config
from sqlalchemy import create_engine, inspect
@pytest.fixture
def alembic_config():
config = Config('alembic.ini')
config.set_main_option('sqlalchemy.url', 'sqlite:///:memory:')
return config
def test_upgrade_downgrade(alembic_config):
"""Test that migrations can be applied and reverted."""
# Upgrade to head
command.upgrade(alembic_config, 'head')
# Verify tables exist
engine = create_engine('sqlite:///:memory:')
inspector = inspect(engine)
tables = inspector.get_table_names()
assert 'users' in tables
assert 'posts' in tables
# Downgrade
command.downgrade(alembic_config, 'base')
# Verify tables are gone
inspector = inspect(engine)
tables = inspector.get_table_names()
assert 'users' not in tables
def test_migration_reversibility(alembic_config):
"""Test that each migration can be reversed."""
# Get all revisions
command.upgrade(alembic_config, 'head')
# Test each downgrade
current = 'head'
while current != 'base':
command.downgrade(alembic_config, '-1')
# Verify schema is consistent
# ... add checks
Best Practices
1. Always Review Auto-Generated Migrations
# Review and modify auto-generated migrations
def upgrade():
# Auto-generated
op.add_column('users', sa.Column('phone', sa.String()))
# Add manual steps
op.execute("UPDATE users SET phone = '' WHERE phone IS NULL")
op.alter_column('users', 'phone', nullable=False)
2. Include Data Migrations When Needed
def upgrade():
# Schema change
op.add_column('users', sa.Column('full_name', sa.String(100)))
# Data migration
op.execute("""
UPDATE users
SET full_name = first_name || ' ' || last_name
""")
# Make required
op.alter_column('users', 'full_name', nullable=False)
3. Write Reversible Migrations
# Good: Can be reversed
def upgrade():
op.add_column('users', sa.Column('phone', sa.String(20)))
def downgrade():
op.drop_column('users', 'phone')
# Bad: Loses data on downgrade
def upgrade():
op.drop_column('users', 'old_data')
def downgrade():
op.add_column('users', sa.Column('old_data', sa.String()))
# Old data is lost!
4. Use Transactions
def upgrade():
# All operations in a transaction
with op.get_context().autocommit_block():
op.execute("LOCK TABLE users IN EXCLUSIVE MODE")
op.add_column('users', sa.Column('status', sa.String(20)))
op.execute("UPDATE users SET status = 'active'")
5. Test Migrations
# Test upgrade
alembic upgrade head
# Test downgrade
alembic downgrade -1
# Test full cycle
alembic downgrade base
alembic upgrade head
6. Document Complex Migrations
"""
Complex data migration: Convert user roles from boolean flags to enum
This migration:
1. Creates new user_role enum type
2. Adds status column with enum type
3. Migrates data from is_admin, is_moderator flags
4. Drops old boolean columns
Important: This migration cannot be easily reversed without data loss.
Backup your data before running in production.
"""
def upgrade():
# ... implementation
pass
Common Issues and Solutions
Issue 1: Alembic Can't Detect Changes
Problem: Auto-generate doesn't detect model changes.
Solution:# Make sure models are imported in alembic/env.py
from myapp.models import User, Post, Comment # Import all models
from myapp.models import Base
target_metadata = Base.metadata
Issue 2: Circular Dependencies
Problem: Migrations have circular dependencies.
Solution:# Use branches
alembic revision -m "feature A" --head=head --branch-label=feature_a
alembic revision -m "feature B" --head=head --branch-label=feature_b
# Merge when both are done
alembic merge -m "merge features" heads
Issue 3: Production Database Out of Sync
Problem: Production database schema doesn't match migrations.
Solution:# Stamp current state
alembic stamp head
# Or stamp specific revision
alembic stamp abc123
Issue 4: Large Table Migrations
Problem: Adding column to large table locks table too long.
Solution:# Add column as nullable first
def upgrade():
op.add_column('large_table', sa.Column('new_col', sa.String(), nullable=True))
# Update in batches
op.execute("""
UPDATE large_table
SET new_col = 'default'
WHERE new_col IS NULL
LIMIT 1000
""")
# Make not nullable after data is populated
# Run in separate migration
Integration with Applications
Flask Integration
# app.py
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///app.db'
db = SQLAlchemy(app)
migrate = Migrate(app, db)
# Commands available:
# flask db init - Initialize migrations
# flask db migrate - Create migration
# flask db upgrade - Apply migrations
# flask db downgrade - Rollback migrations
FastAPI Integration
# main.py
from fastapi import FastAPI
from alembic.config import Config
from alembic import command
app = FastAPI()
@app.on_event("startup")
async def startup_event():
# Run migrations on startup
alembic_cfg = Config("alembic.ini")
command.upgrade(alembic_cfg, "head")
Django-Style Migrations
# Create migration with custom operations
def upgrade():
# Django-like migration operations
with op.batch_alter_table('users') as batch_op:
batch_op.add_column(sa.Column('status', sa.String(20)))
batch_op.create_index('ix_users_status', ['status'])
Key Takeaways
Additional Resources
Official Documentation:
Tools:
Best Practices:
Next Steps
Database migrations are essential for professional application development. Master them to safely evolve your database schema over time!