visit
Quite often in my practice, I see people use varchar
fields with a small and limited number of available options.
To begin with our database, we will start with the main table post. I'm now omitting the author
field because this is our blog, and we don't have any plans to add additional authors except for us ;)
CREATE TABLE post (
id INT NOT NULL AUTO_INCREMENT,
title VARCHAR(255) NOT NULL,
status ENUM ('archived', 'deleted', 'draft', 'published') DEFAULT 'draft',
content TEXT NOT NULL,
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id),
KEY (status)
)
ENGINE = InnoDB;
I can imagine three ways how to describe the status
field.
status VARCHAR(255) NOT NULL default 'draft'
status ENUM ('archived', 'deleted', 'draft', 'published') DEFAULT 'draft',
status TINYINT(1) UNSIGNED NOT NULL DEFAULT '0'
varchar_score = 0
enum_score = 0
integer_score = 0
varchar_score += 1
enum_score +=1
Internal value | DisplayedValue |
---|---|
null | null |
0 | '' |
1 | 'archived' |
2 | 'deleted' |
3 | 'draft' |
4 | 'published' |
enum('draft', 'published', 'deleted', 'archived')
Ordering of the data will work based on internal representation - first will go values with draft
next - deleted
, draft
, and published
. This behavior is confusing the first time until you realize the reason for that. The correct order of options in the enum should match your expected sorting behavior.
ENUM ('archived', 'deleted', 'draft', 'published')
For varchar, it's a bit more complicated. It will depend on the character set. For example, latin1
will take one byte per character, and utf8mb4
can require up to four bytes per character. Plus one byte to record the length of the string.
The set of statuses archived
, deleted
, draft
, and published
will take from 6 to 10 bytes in the latin1
character set.
enum_score += 1
integer_score +=1
varchar_score += 1
integer_score +=1
docker run --name blog_mysql -e MYSQL_ROOT_PASSWORD=secret -e MYSQL_DATABASE=blog -d mysql:8
docker exec -it blog_mysql mysql -u root -psecret blog
Creating three very similar tables. The only difference will be in the status
field.
CREATE TABLE post_enum (
id INT NOT NULL AUTO_INCREMENT,
title VARCHAR(255) NOT NULL,
status ENUM ('archived', 'deleted', 'draft', 'published') DEFAULT 'draft',
content TEXT NOT NULL,
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id),
KEY (status)
)
ENGINE = InnoDB
COLLATE utf8_bin;
CREATE TABLE post_varchar (
id INT NOT NULL AUTO_INCREMENT,
title VARCHAR(255) NOT NULL,
status VARCHAR(10) DEFAULT 'draft',
content TEXT NOT NULL,
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id),
KEY (status)
)
ENGINE = InnoDB
COLLATE utf8_bin;
CREATE TABLE post_int (
id INT NOT NULL AUTO_INCREMENT,
title VARCHAR(255) NOT NULL,
status TINYINT(1) NOT NULL DEFAULT 0,
content TEXT NOT NULL,
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id),
KEY (status)
)
ENGINE = InnoDB
COLLATE utf8_bin;
SHOW TABLES;
+----------------+
| Tables_in_blog |
+----------------+
| post_enum |
| post_int |
| post_varchar |
+----------------+
DROP PROCEDURE IF EXISTS add_post_varchar;
DROP PROCEDURE IF EXISTS add_post_enum;
DROP PROCEDURE IF EXISTS add_post_int;
DELIMITER $$
CREATE PROCEDURE add_post_varchar(number_of_posts INT)
BEGIN
DECLARE loop_counter INT UNSIGNED DEFAULT 0;
DECLARE post_content TEXT DEFAULT 'Lorem Ipsum is simply dummy text of the printing and typesetting industry.';
DECLARE post_title VARCHAR(255) DEFAULT 'Lorem Ipsum';
WHILE loop_counter < number_of_posts
DO
INSERT
INTO
post_varchar (title, status, content)
SELECT
post_title,
ELT(0.5 + RAND() * 4, 'archived', 'deleted', 'draft', 'published'),
post_content;
SET loop_counter = loop_counter + 1;
END WHILE;
END $$
CREATE PROCEDURE add_post_enum(number_of_posts INT)
BEGIN
DECLARE loop_counter INT UNSIGNED DEFAULT 0;
DECLARE post_content TEXT DEFAULT 'Lorem Ipsum is simply dummy text of the printing and typesetting industry.';
DECLARE post_title VARCHAR(255) DEFAULT 'Lorem Ipsum';
WHILE loop_counter < number_of_posts
DO
INSERT
INTO
post_enum (title, status, content)
SELECT
post_title,
ELT(0.5 + RAND() * 4, 'archived', 'deleted', 'draft', 'published'),
post_content;
SET loop_counter = loop_counter + 1;
END WHILE;
END $$
CREATE PROCEDURE add_post_int(number_of_posts INT)
BEGIN
DECLARE loop_counter INT UNSIGNED DEFAULT 0;
DECLARE post_content TEXT DEFAULT 'Lorem Ipsum is simply dummy text of the printing and typesetting industry.';
DECLARE post_title VARCHAR(255) DEFAULT 'Lorem Ipsum';
WHILE loop_counter < number_of_posts
DO
INSERT
INTO
post_int (title, status, content)
SELECT
post_title,
ELT(0.5 + RAND() * 4, '1', '2', '3', '4'),
post_content;
SET loop_counter = loop_counter + 1;
END WHILE;
END $$
DELIMITER ;
call add_post_int(200000);
call add_post_enum(200000);
call add_post_varchar(200000);
SELECT COUNT(*), status FROM post_enum GROUP BY status
UNION
SELECT COUNT(*), status FROM post_int GROUP BY status
UNION
SELECT count(*), status FROM post_varchar GROUP BY status;
+----------+-----------+
| COUNT(*) | status |
+----------+-----------+
| 50058 | archived |
| 50229 | deleted |
| 50273 | draft |
| 49440 | published |
| 50123 | 1 |
| 49848 | 2 |
| 49642 | 3 |
| 50387 | 4 |
| 49885 | archived |
| 49974 | deleted |
| 50060 | draft |
| 50081 | published |
+----------+-----------+
SET profiling=1;
SELECT SQL_NO_CACHE COUNT(*), status FROM post_enum GROUP BY status;
SELECT SQL_NO_CACHE COUNT(*), status FROM post_varchar GROUP BY status;
SELECT SQL_NO_CACHE COUNT(*), status FROM post_int GROUP BY status;
Query_ID | Duration | Query |
---|---|---|
1 | 0.05163150 | SELECT SQL_NO_CACHE COUNT(*), status FROM post_enum GROUP BY status |
2 | 0.05172150 | SELECT SQL_NO_CACHE COUNT(*), status FROM post_varchar GROUP BY status |
3 | 0.05893025 | SELECT SQL_NO_CACHE COUNT(*), status FROM post_int GROUP BY status |
varchar_score += 1
enum_score += 1
integer_score += 1
Option | Score |
---|---|
integer_score | 3 |
varchar_score | 3 |
enum_score | 3 |