Skip to main content

Star /Snowflake Schema

Star and snowflake schemas are two types of data warehouse schemas used for organizing tables and relationships in a data warehouse:

  1. Star Schema:

    • Structure: In a star schema, there is one central "fact" table surrounded by multiple "dimension" tables.
    • Central Table (Fact Table): The fact table contains quantitative data or "facts" about business events or activities.
    • Surrounding Tables (Dimension Tables): Dimension tables provide context or descriptive attributes about the data in the fact table.
    • Relationships: The fact table is connected to the dimension tables through foreign key relationships.
    • Shape: When visualized, the relationships between the fact table and dimension tables resemble a star, with the fact table at the center and dimension tables radiating outward like star points.
  2. Snowflake Schema:

    • Structure: A snowflake schema is an extension of the star schema where dimension tables are further normalized into multiple related tables.
    • Normalization: Dimension tables in a snowflake schema are split into smaller tables to reduce redundancy and improve data integrity.
    • Shape: When visualized, the relationships resemble a snowflake, with the fact table at the center and dimension tables branching out into additional levels of related tables.
  3. Download Sakila

# cd into the unzipped folder containing the sakila files
brew install mysql
mysql_secure_installation
brew services start mysql
mysql -u root
SOURCE sakila-schema.sql;
SOURCE sakila-data.sql;
CREATE USER 'new_user'@'localhost' IDENTIFIED BY 'new_password';
GRANT ALL PRIVILEGES ON sakila.* TO 'new_user'@'localhost';

Snowflake Converting the snowflake schema above to the star schema below Star schema

import mysql.connector

# Connect to MySQL server
conn = mysql.connector.connect(
host="localhost",
user="new_user", # Enter your MySQL username
password="new_password", # Enter your MySQL password
database="sakila" # Enter the name of the database
)
sql_commands = [
"""
CREATE TABLE dimDate (
date_key integer NOT NULL PRIMARY KEY,
date date NOT NULL,
year smallint NOT NULL,
quarter smallint NOT NULL,
month smallint NOT NULL,
day smallint NOT NULL,
week smallint NOT NULL,
is_weekend boolean
);
""",
"""
CREATE TABLE dimCustomer (
customer_key SERIAL PRIMARY KEY,
customer_id smallint NOT NULL,
first_name varchar(45) NOT NULL,
last_name varchar(45) NOT NULL,
email varchar(50),
address varchar(50) NOT NULL,
address2 varchar(50),
district varchar(20) NOT NULL,
city varchar(50) NOT NULL,
country varchar(50) NOT NULL,
postal_code varchar(10),
phone varchar(20) NOT NULL,
active smallint NOT NULL,
create_date timestamp NOT NULL,
start_date date NOT NULL,
end_date date NOT NULL
);
""",
"""
CREATE TABLE dimMovie (
movie_key SERIAL PRIMARY KEY,
film_id smallint NOT NULL,
title varchar(255) NOT NULL,
description text,
release_year year,
language varchar(20) NOT NULL,
original_language varchar(20),
rental_duration smallint NOT NULL,
length smallint NOT NULL,
rating varchar(5) NOT NULL,
special_features varchar(60) NOT NULL
);
""",
"""
CREATE TABLE dimStore (
store_key SERIAL PRIMARY KEY,
store_id smallint NOT NULL,
address varchar(50) NOT NULL,
address2 varchar(50),
district varchar(20) NOT NULL,
city varchar(50) NOT NULL,
country varchar(50) NOT NULL,
postal_code varchar(10),
manager_first_name varchar(45) NOT NULL,
manager_last_name varchar(45) NOT NULL,
start_date date NOT NULL,
end_date date NOT NULL
);
""",
"""
CREATE TABLE factSales (
sales_key SERIAL PRIMARY KEY,
date_key INT NOT NULL REFERENCES dimDate(date_key),
customer_key INT NOT NULL REFERENCES dimCustomer(customer_key),
movie_key INT NOT NULL REFERENCES dimMovie(movie_key),
store_key INT NOT NULL REFERENCES dimStore(store_key),
sales_amount decimal(5,2) NOT NULL
);
"""
]

cursor = conn.cursor()

# Execute each SQL command
for sql_command in sql_commands:
cursor.execute(sql_command)

# Commit the changes
conn.commit()

 sql_commands = [
"""
INSERT INTO dimDate (date_key, date, year, quarter, month, day, week, is_weekend)
SELECT DISTINCT(
CAST(DATE_FORMAT(payment_date, '%Y%m%d') AS UNSIGNED)
) AS date_key,
DATE(payment_date) AS date,
YEAR(payment_date) AS year,
QUARTER(payment_date) AS quarter,
MONTH(payment_date) AS month,
DAY(payment_date) AS day,
WEEK(payment_date) AS week,
CASE
WHEN DAYOFWEEK(payment_date) IN (6, 7) THEN TRUE
ELSE FALSE
END AS is_weekend
FROM payment;
""",
"""
INSERT INTO dimCustomer (customer_key, customer_id, first_name, last_name, email, address, address2, district, city, country, postal_code, phone, active, create_date, start_date, end_date)
SELECT c.customer_id AS customer_key,
c.customer_id,
c.first_name,
c.last_name,
c.email,
a.address,
a.address2,
a.district,
ci.city,
co.country,
a.postal_code,
a.phone,
c.active,
c.create_date,
NOW() AS start_date,
NOW() AS end_date
FROM customer c
JOIN address a ON (c.address_id = a.address_id)
JOIN city ci ON (a.city_id = ci.city_id)
JOIN country co ON (ci.country_id = co.country_id);
""",
"""
INSERT INTO dimMovie (movie_key, film_id, title, description, release_year, language, original_language, rental_duration, length, rating, special_features)
SELECT f.film_id AS movie_key,
f.film_id,
f.title,
f.description,
f.release_year,
l.name AS language,
orig_lang.name AS original_language,
f.rental_duration,
f.length,
f.rating,
f.special_features
FROM film f
JOIN language l ON (f.language_id = l.language_id)
LEFT JOIN language orig_lang ON (f.original_language_id = orig_lang.language_id);
""",
"""
INSERT INTO dimStore (store_key, store_id, address, address2, district, city, country, postal_code, manager_first_name, manager_last_name, start_date, end_date)
SELECT s.store_id AS store_key,
s.store_id,
a.address,
a.address2,
a.district,
c.city,
co.country,
a.postal_code,
st.first_name AS manager_first_name,
st.last_name AS manager_last_name,
NOW() AS start_date,
NOW() AS end_date
FROM store s
JOIN staff st ON (s.manager_staff_id = st.staff_id)
JOIN address a ON (s.address_id = a.address_id)
JOIN city c ON (a.city_id = c.city_id)
JOIN country co ON (c.country_id = co.country_id);
""",
"""
INSERT INTO factSales (date_key, customer_key, movie_key, store_key, sales_amount)
SELECT CAST(DATE_FORMAT(p.payment_date, '%Y%m%d') AS UNSIGNED) AS date_key,
p.customer_id AS customer_key,
i.film_id AS movie_key,
i.store_id AS store_key,
p.amount AS sales_amount
FROM payment p
JOIN rental r ON (p.rental_id = r.rental_id)
JOIN inventory i ON (r.inventory_id = i.inventory_id);
"""
]

for sql_command in sql_commands:
cursor.execute(sql_command)

# Commit the changes
conn.commit()
mysql> select * from  factSales  limit 5;
+-----------+----------+--------------+-----------+-----------+--------------+
| sales_key | date_key | customer_key | movie_key | store_key | sales_amount |
+-----------+----------+--------------+-----------+-----------+--------------+
| 1 | 20050708 | 431 | 1 | 1 | 0.99 |
| 2 | 20050802 | 518 | 1 | 1 | 3.99 |
| 3 | 20050821 | 279 | 1 | 1 | 3.99 |
| 4 | 20050530 | 411 | 1 | 1 | 1.99 |
| 5 | 20050617 | 170 | 1 | 1 | 0.99 |
+-----------+----------+--------------+-----------+-----------+--------------+
5 rows in set (0.00 sec)

mysql>