Mr. Editor-in-chief Mr. Editor-in-chief November 11, 2021 Updated April 24, 2026

Socratica SQL Tutorial (PostgreSQL)

Basic SQL Commands

PostgreSQL MySQL
psql -U postgres                    
\list
\c db_name
\dt
\d table_name
mysql -u root -p                     
show databases;
use db_name;
show tables;
describe table_name;

Get your feet wet

> CREATE DATABASE social_network;
> CREATE TABLE users (
    user_id int,
    first_name VARCHAR(100),
    last_name VARCHAR(100),
    email VARCHAR(255)
  );
> ALTER TABLE users ADD encrypted_password VARCHAR(225);
> ALTER TABLE users DROP COLUMN email;
> DROP TABLE users;
> DROP DATABASE social_network;


> CREATE TABLE movies (
    movie_id SERIAL PRIMARY KEY,
    -- Postgresql's version of MySQL 'movie_id int(11) NOT NULL AUTO_INCREMENT'
    title VARCHAR(100),
    description VARCHAR(100),
    price NUMERIC
 );

> INSERT INTO movies (movie_id, title, description, price) 
  VALUES (1, 'Gattaca', 'Movie or documentary?', 4.99);
> SELECT * FROM movies;
> SELECT title FROM movies;

> INSERT INTO movies (title, price) VALUES ('Star Wars', 8.99);
> INSERT INTO movies (title, price) VALUES (E'Logan\'s Run', 3.99);
> INSERT INTO movies (title, price) VALUES ('Solaris', 2.99);
> INSERT INTO movies (title, price) VALUES ('Jaws', 5.25);
> INSERT INTO movies (title, price) VALUES ('Silent Running', 3.00);

> SELECT title, price FROM movies;
> SELECT title, price FROM movies ORDER BY price;
> SELECT title, price FROM movies ORDER BY price DESC;

> UPDATE movies SET price = 0.99 WHERE title = 'Jaws';
> SELECT title, price FROM movies ORDER BY price;

> DELETE FROM movies WHERE title = 'Star Wars';
> SELECT title, price FROM movies ORDER BY price;

SQL SELECT

SELECT column1, column2, ...
FROM table1
WHERE condition1 AND/OR condition2 ...
ORDER BY columni (ASC | DESC)
LIMIT n;

-- pgAdmin: Serves -> [Server Name] -> Databases -> [Database Name] -> Query Tool
> CREATE TABLE earthquake(
      earthquake_id SERIAL PRIMARY KEY,
      occurred_on TIMESTAMP WITHOUT TIME ZONE,
      latitude NUMERIC,
      longitude NUMERIC,
      depth NUMERIC,
      magnitude NUMERIC,
      calculation_method CHARACTER VARYING,
      network_id CHARACTER VARYING,
      place CHARACTER VARYING,
      cause CHARACTER VARYING
  );
-- Execute (F5)
-- Right-click the 'earthquake' table
-- Import/Export Data (make sure the 'Options -> Header' switch to 'Yes' before come back to 'General') -> Filename -> Select file -> '...(Options) -> Upload'

> SELECT * FROM earthquake;
> SELECT COUNT(*) FROM earthquake;
> SELECT place, magnitude, occurred_on FROM earthquake;
> SELECT * FROM earthquake WHERE occurred_on >= '2000-01-01';

-- What was the largest earthquake in 2010?
> SELECT * FROM earthquake WHERE occurred_on >= '2010-01-01' and occurred_on < '2020-12-31'
  ORDER BY magnitude DESC LIMIT 1;

-- SQL Functions: COUNT, MIN, MAX, AVG, SUM
> SELECT MIN(occurred_on), MAX(occurred_on) FROM earthquake;

-- What magnitude range is covered by the 'earthquake' table?
> SELECT MIN(magnitude), MAX(magnitude) FROM earthquake;

> SELECT cause FROM earthquake;
> SELECT DISTINCT cause FROM earthquake;

-- How many earthquakes are natural earthquakes
> SELECT COUNT(*) FROM earthquake WHERE cause = 'earthquake';
> SELECT COUNT(*) FROM earthquake WHERE cause = 'nuclear explosion';
> SELECT COUNT(*) FROM earthquake WHERE cause = 'explosion';

-- What is the most recent earthquake caused by nuclear explosion?
> SELECT place, magnitude, occurred_on FROM earthquake 
  WHERE cause = 'nuclear explosion'
  ORDER BY occurred_on DESC
  LIMIT 1;

-- What are the 10 largest earthquakes from 1969 - 2018?
> SELECT place, magnitude, occurred_on FROM earthquake 
  WHERE occurred_on >= '1969-01-01' AND occurred_on <= '2018-12-31'
  ORDER BY magnitude DESC
  LIMIT 10;

-- How can we count the number of aftershocks?
-- Idea: Find quakes with 'Honshu' and 'Japan' in the 'place' text & occurred within 
-- a week of the initial quake
> SELECT COUNT(*) FROM earthquake 
  WHERE place LIKE '%Honshu%Japan%' AND occurred_on BETWEEN '2011-03-11' AND '2011-03-18';

SQL INSERT

> CREATE TABLE chitter_user
  (
      user_id SERIAL PRIMARY KEY,
      username TEXT,
      encrypted_password TEXT,
      email TEXT,
      date_joined TIMESTAMP WITHOUT TIME ZONE
  );

> CREATE TABLE post
  (
      post_id SERIAL PRIMARY KEY,
      user_id INT,
      post_text TEXT,
      posted_on TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP,
      CONSTRAINT user_id_constraint FOREIGN KEY (user_id)
          REFERENCES chitter_user (user_id) MATCH SIMPLE
          ON UPDATE CASCADE
          ON DELETE CASCADE
  );

> INSERT INTO chitter_user (user_id, username, encrypted_password, email , date_joined)
  VALUES (DEFAULT, 'firstuser', 'd63dc919e2dc30d2', 'fakemail@fakedomain.fake', '2019-02-25');
> INSERT INTO chitter_user (username, encrypted_password) VALUES ('seconduser', '9a834yva9fn3493yn');
> SELECT * FROM chitter_user;

> INSERT INTO post (user_id, post_text) 
  VALUES 
    (1, 'Hello World!'),
    (1, 'Hello Solar System');
> SELECT * FROM post;

-- Note: Insert multiple rows in one statement is more efficient than insert one row at a time.

SQL UPDATE

UPDATE table_name
SET column1 = value1, column2 = value2, ...
WHERE condition1 AND condition2;

> CREATE TABLE IF NOT EXISTS secret_user (
  user_id SERIAL PRIMARY KEY,
  first_name VARCHAR(20),
  last_name VARCHAR(20),
  code_name VARCHAR(20),
  country VARCHAR(20),
  organization VARCHAR(10),
  salary INT,
  knows_kung_fu BOOL
  );

> INSERT INTO secret_user
  (first_name, last_name, code_name, country, organization, salary, knows_kung_fu)
  VALUES
  ('Jimmy', 'Bond', '007', 'United Kingdom', 'MI6', 97200, false),
  ('George', 'Smiley', 'Beggarman', 'United Kingdom', 'MI6', 97200, false),
  ('Jason', 'Bourne', 'Delta One', 'United States', 'CIA', 115000, false),
  ('Jack', 'Ryan', null, 'United States', 'CIA', 85000, false),
  ('Ethan', 'Hunt', 'Bravo Echo 1-1', 'United States', 'IMF', 250000, false),
  ('Emma', 'Peel', 'Mrs. Peel', 'United Kingdom', 'MI6', 97200, true),
  ('Susan', 'Hilton', 'Agent 99', 'United States', 'Control',250000 , false),
  ('Nick', 'Fury', 'Foxtrol', 'United States', 'SHIELD', 250000, false);

> SELECT * FROM secret_user;
> UPDATE secret_user SET first_name = 'James' WHERE user_id = 1;
> SELECT * FROM secret_user ORDER BY user_id;

> UPDATE secret_user SET code_name = 'Neo 2.0', salary = 115000
  WHERE first_name = 'Jack' AND last_name = 'Ryan';
> SELECT * FROM secret_user ORDER BY user_id;

> UPDATE secret_user SET salary = 115000
  WHERE organization = 'MI6';
> SELECT * FROM secret_user ORDER BY user_id;

> UPDATE secret_user SET knows_kung_fu = TRUE WHERE user_id in (5, 7, 8);
> SELECT * FROM secret_user ORDER BY user_id;

-- Give every agent a 10% salary raise
> UPDATE secret_user SET salary = salary * 1.10;
> SELECT * FROM secret_user ORDER BY user_id;

> SELECT SUM(salary) FROM secret_user;

SQL DELETE

DELETE FROM table_name 
WHERE condition(s);

> CREATE TABLE IF NOT EXISTS song(
  song_id SERIAL PRIMARY KEY, 
  title TEXT,
  artist TEXT,
  album TEXT,
  year_released INT,
  duration NUMERIC,
  tempo NUMERIC,
  loudness NUMERIC
  );

-- Import song.csv

> SELECT MIN(year_released), MAX(year_released) FROM song;
> SELECT DISTINCT year_released FROM song ORDER BY year_released;
> SELECT COUNT(*) FROM song WHERE year_released = 0;
> DELETE FROM song WHERE year_released = 0;
> SELECT COUNT(*) FROM song WHERE year_released = 0;
> SELECT COUNT(*) FROM song;

> SELECT MIN(tempo), MAX(tempo) FROM song;
> SELECT COUNT(*) FROM song WHERE tempo = 0;
> DELETE FROM song WHERE tempo = 0;
> SELECT COUNT(*) FROM song WHERE tempo = 0;
> SELECT MIN(tempo), MAX(tempo) FROM song;
> SELECT COUNT(*) FROM song;

> SELECT MIN(duration), MAX(duration) FROM song;

> SELECT MIN(loudness), MAX(loudness) FROM song;
> SELECT COUNT(*) FROM song WHERE loudness > 0;
> DELETE FROM song WHERE loudness > 0;
> SELECT MIN(loudness), MAX(loudness) FROM song;

-- Has tempo changed over time?
> SELECT year_released, AVG(tempo)
  FROM song
  GROUP BY year_released
  ORDER BY year_released;

> SELECT year_released, ROUND(AVG(tempo))
  FROM song
  GROUP BY year_released
  ORDER BY year_released;

SQL JOINS

SELECT column1, column2, t1.name, t2,name, ...
FROM left_table as t1
____ JOIN right_table as t2
ON t1.column1 = t2.column2
WHERE condition(s)
ORDER BY value;

-- INNER JOIN only returns connected, matching rows
-- LEFT JOIN returns all connected rows, and unconnected rows from left table (null in right)
-- RIGHT JOIN returns all connected rows, and unconnected row from right table (null in left)
-- FULL(OUTER) JOIN returns connected and unconnected rows from both left & right tabls



> CREATE TABLE IF NOT EXISTS martian (
      martian_id SERIAL PRIMARY KEY,
      first_name VARCHAR(20),
      last_name VARCHAR(20),
      base_id INT,
      super_id INT
  );

> CREATE TABLE base (
      base_id SERIAL PRIMARY KEY,
      base_name VARCHAR(50),
      founded DATE
  );

> INSERT INTO martian
      (first_name, last_name, base_id, super_id)
  VALUES  ('Ray', 'Bradbury', 1, null),
          ('John', 'Black', 4, 10),
          ('Samuel', 'Hinkston', 4, 2),
          ('Jeff', 'Spender', 1, 9),
          ('Sam', 'Parkhill', 2, 12),
          ('Elma', 'Parkhill', 3, 8),
          ('Melissa', 'Lewis', 1, 1),
          ('Mark', 'Watney', 3, null),
          ('Beth', 'Johanssen', 1, 1),
          ('Chris', 'Beck', 4, null),
          ('Nathaniel', 'York', 4, 2),
          ('Elon', 'Musk', 2, null),
          ('John', 'Carter', null, 8);

> INSERT INTO base (base_name, founded)
  VALUES ('Tharsisland', '2037-06-03'), 
          ('Valles Marineris 2.0', '2040-12-01'),
          ('Gale Cratertown', '2041-08-15'),
          ('New New New York', '2042-02-10'),
          ('Olympus Mons Spa & Casino', null);

> SELECT * 
  FROM martian  -- left table
  INNER JOIN base -- right table
  ON martian.base_id = base.base_id;

> SELECT first_name, last_name, base_name
  FROM martian 
  INNER JOIN base
  ON martian.base_id = base.base_id;

> SELECT first_name, last_name, base_name
  FROM martian 
  LEFT JOIN base
  ON martian.base_id = base.base_id;

> SELECT first_name, last_name, base_name
  FROM martian 
  RIGHT JOIN base
  ON martian.base_id = base.base_id;

-- Using table aliases
> SELECT m.first_name, m.last_name, b.base_id, b.base_name
  FROM martian as m
  FULL JOIN base as b
  ON m.base_id = b.base_id;


> CREATE TABLE visitor (
      visitor_id SERIAL PRIMARY KEY,
      host_id INT,
      first_name VARCHAR(20),
      last_name VARCHAR(20)
  );

> INSERT INTO visitor (host_id, first_name, last_name)
  VALUES (7, 'George', 'Ambrose'),
          (1, 'Kris', 'Cardenas'),
          (9, 'Priscilla', 'Lane'),
          (11, 'Jane', 'Thornton'),
          (null, 'Doug', 'Stavenger'),
          (null, 'Jamie', 'Waterman'),
          (8, 'Martin', 'Humphries');

> CREATE TABLE inventory (
      base_id INT,
      supply_id INT,
      quantity INT
  );

> INSERT INTO inventory (base_id, supply_id, quantity)
  VALUES (1, 1, 8),
          (1, 3, 5),
          (1, 5, 1),
          (1, 6, 2),
          (1, 8, 12),
          (1, 9, 1),
          (2, 4, 5),
          (2, 8, 62),
          (2, 10, 37),
          (3, 2, 11),
          (3, 7, 2),
          (4, 10, 91);

> CREATE TABLE supply (
      supply_id SERIAL PRIMARY KEY,
      name VARCHAR(30),
      description VARCHAR(255),
      quantity INT
  );




> INSERT INTO supply (name, description, quantity)
  VALUES 
  ('Solar Panel', 'Standard 1x1 meter cell', 912),
  ('Water Filter', E'This takes things out of your water so it\'s drinkable.', 6),
  ('Duct Tape', 'A 10 meter roll of duct tape for ALL your repaires.', 951),
  ('Ketchup', E'It\'s ketchup...', 206),
  ('Battery Cell', 'Standard 1000 kAh battery cell for power grid (heavy item).', 17),
  ('USB 6.0 Cable', 'Carbon fiber coated / 15 TBps spool', 42),
  ('Fuzzy Duster', 'It gets dusty around here. Be prepared!', 19),
  ('Mars Bars', 'The ORIGINAL nutrient bar made with the finest bioengineered ingredients.', 3801),
  ('Air Filter', 'Removes 99% of all Martian dust from your ventilation unit.', 23),
  (E'Famous Ray\'s Frozen Pizza', 'This Martian favorite is covered in all your favorite toppings. 1 flavor only.', 823);

-- Display name of each visitor & show name of visitor's host 
> SELECT v.first_name AS visitor_fn, v.last_name AS visitor_ln, 
  m.first_name AS martian_fn, m.last_name AS martian_ln
  FROM visitor AS v
  LEFT JOIN martian AS m
  ON v.host_id = m.martian_id;

-- Display list of each Martian and the person they report to
> SELECT m.first_name AS fn, m.last_name AS ln, s.first_name AS super_fn, s.last_name AS super_ln
  FROM martian AS m
  LEFT JOIN martian AS s
  ON m.super_id = s.martian_id
  ORDER BY m.martian_id;

-- Inventory for Base #1: name of supplies, quantity, items both in stock and out of stock
> SELECT * FROM inventory WHERE base_id = 1;

> SELECT s.supply_id, COALESCE(i.quantity, 0) AS quantity, s.name, s.description
  FROM (SELECT * FROM inventory WHERE base_id = 1) AS i
  RIGHT JOIN supply AS s
  ON i.supply_id = s.supply_id
  ORDER BY s.supply_id;

-- Display list of visitors without host and martians available to do the hosting
> SELECT v.first_name AS visitor_fn, v.last_name AS visitor_ln,
        m.first_name AS martian_fn, m.last_name AS martian_ln
  FROM visitor as v
  FULL JOIN martian as m
  ON v.host_id = m.martian_id
  WHERE v.host_id IS null OR v.visitor_id IS null;

SQL VIEWS

> CREATE TABLE martian_confidential (
      martian_id SERIAL PRIMARY KEY,
      first_name VARCHAR(20),
      last_name VARCHAR(20),
      base_id INT,
      super_id INT,
      salary INT,
      dna_id VARCHAR(11)
  );

> INSERT INTO martian_confidential
      (first_name, last_name, base_id, super_id, salary, dna_id)
  VALUES  ('Ray', 'Bradbury', 1, null, 155900, 'ayyxqrkrmhr'),
          ('John', 'Black', 4, 10, 120100, 'hofikijmsqx'),
          ('Samuel', 'Hinkston', 4, 2, 110000, 'eiozffxtexn'),
          ('Jeff', 'Spender', 1, 9, 10000, 'byjfdafumby'),
          ('Sam', 'Parkhill', 2, 12, 125000, 'hwteazassov'),
          ('Elma', 'Parkhill', 3, 8, 137000, 'fgprhclobcm'),
          ('Melissa', 'Lewis', 1, 1, 145250, 'dpzyrqmqtth'),
          ('Mark', 'Watney', 3, null, 121100, 'gqgixxnayyj'),
          ('Beth', 'Johanssen', 1, 1, 130000, 'njtpyekmaoo'),
          ('Chris', 'Beck', 4, null, 125000, 'kphtalsfwma'),
          ('Nathaniel', 'York', 4, 2, 105000, 'drunnydnfsr'),
          ('Elon', 'Musk', 2, null, 155800, 'lbwicmztoxa'),
          ('John', 'Carter', null, 8, 129500, 'zecqwjengec');

> CREATE VIEW martian_public AS 
> SELECT martian_id, first_name, last_name, base_id
  FROM martian_confidential;

> SELECT * FROM martian_public;

> CREATE VIEW people_on_mars AS
  SELECT CONCAT('M', martian_id) AS id, first_name, last_name, 'Martian' AS status
  FROM martian_public
    UNION
  SELECT CONCAT('V', visitor_id) AS id, first_name, last_name, 'Visitor' AS status
  FROM visitor;

> SELECT * FROM people_on_mars;

> CREATE VIEW base_storage AS 
  SELECT b.base_id, s.supply_id, s.name,
    COALESCE((SELECT quantity FROM inventory WHERE base_id = b.base_id AND supply_id = s.supply_id), 0) AS quantity
  FROM base AS b
  CROSS JOIN supply AS s
  ORDER BY base_id, supply_id;

> SELECT * FROM base_storage;

SQL INDEX

# Python Script to create and insert a table of 10, 000, 000 rows
# pip install SQLAlchemy psycopg2
from sqlalchemy import create_engine, text
import csv 
from itertools import islice
import pandas as pd

uri = 'postgresql://user:pass@localhost:5432/database_name'
pg_engine = create_engine(uri)
with pg_engine.connect() as con:
    query_str = text('''
    CREATE TABLE IF NOT EXISTS person(
    person_id SERIAL PRIMARY KEY,
    first_name VARCHAR(11),
    last_name VARCHAR(11),
    birthday DATE
    );
    ''')
    con.execute(query_str)

with open('female_names.csv', encoding='utf-8') as f1, \
     open('male_names.csv', encoding='utf-8') as f2, \
     open('last_names.csv', encoding='utf-8') as f3:
        female_names = csv.reader(f1)
        female_names = [row[1] for row in female_names]

        male_names = csv.reader(f2)
        male_names = [row[1] for row in male_names]

        last_names = csv.reader(f3)
        last_names = [row[1] for row in last_names]

print(len(female_names), len(male_names), len(last_names))
print(100_000_000 // 1_000_000)


full_female_names = [{'first_name': fn, 'last_name': ln, 'birthday': '1980-01-01'} for fn in female_names for ln in last_names]
full_male_names = [{'first_name': fn, 'last_name': ln, 'birthday': '1980-01-01'} for fn in male_names for ln in last_names]


for i in range(50):
    iterator = iter(full_female_names)
    while chunk := list(islice(iterator, 100000)):
        batch_no = 0
        df = pd.DataFrame(chunk)
        df.to_sql('person', con=uri, if_exists='append', index=False)

    iterator = iter(full_male_names)
    while chunk := list(islice(iterator, 100000)):
        batch_no = 0
        df = pd.DataFrame(chunk)
        df.to_sql('person', con=uri, if_exists='append', index=False)
> SELECT COUNT(*) FROM person;
> SELECT COUNT(*) FROM person WHERE last_name = 'Smith';
> SELECT COUNT(*) FROM person WHERE first_name = 'Emma';
> SELECT COUNT(*) FROM person WHERE last_name in ('Harkins', 'Snow');

> CREATE INDEX person_first_name_idx 
  ON person (first_name);
> SELECT COUNT(*) FROM person WHERE first_name = 'Emma';
> SELECT COUNT(*) FROM person WHERE first_name = 'David';
> SELECT COUNT(*) FROM person WHERE last_name = 'Smith';
> SELECT COUNT(*) FROM person WHERE first_name = 'Julie' AND last_name = 'Andrews';
> SELECT COUNT(*) FROM person WHERE birthday = '1980-01-01' AND first_name = 'Mia';

> CREATE INDEX person_first_name_last_name_idx
  ON person (last_name, first_name);
> SELECT COUNT(*) FROM person WHERE last_name = 'Williams' AND first_name = 'John';