1. 使用SQLite建立数据库并导入数据

In [ ]:
import sqlite3
import pandas as pd

# 创建内存中的SQLite数据库
conn = sqlite3.connect(':memory:')

# 创建Cursor对象
cur = conn.cursor()

# 创建transactions表
cur.execute('''
CREATE TABLE transactions (
    transaction_id INT PRIMARY KEY,
    date DATE,
    amount DECIMAL(10, 2),
    description VARCHAR(255),
    account_id INT
)
''')

# 插入数据到transactions表
cur.executemany('''
INSERT INTO transactions (transaction_id, date, amount, description, account_id) VALUES (?, ?, ?, ?, ?)
''', [
    (1, '2023-01-01', 50.00, 'Office Supplies', 101),
    (2, '2023-01-05', 300.00, 'Software Purchase', 102),
    (3, '2023-01-10', 1500.00, 'Equipment Maintenance', 103),
    (4, '2023-01-15', 75.00, 'Travel Expenses', 104),
    (5, '2023-01-20', 450.00, 'Marketing Campaign', 101),
    (6, '2023-01-25', 1200.00, 'Conference Fees', 105),
    (7, '2023-02-01', 200.00, 'Subscription Service', 106),
    (8, '2023-02-05', 95.00, 'Utility Bill', 107),
    (9, '2023-02-10', 400.00, 'Equipment Purchase', 101),
    (10, '2023-02-15', 700.00, 'Office Renovation', 108),
    (11, '2023-02-20', 30.00, 'Stationery', 102),
    (12, '2023-02-25', 500.00, 'Legal Fees', 103)
])

# 创建accounts表
cur.execute('''
CREATE TABLE accounts (
    account_id INT PRIMARY KEY,
    account_name VARCHAR(255),
    account_type VARCHAR(50)
)
''')

# 插入数据到accounts表
cur.executemany('''
INSERT INTO accounts (account_id, account_name, account_type) VALUES (?, ?, ?)
''', [
    (101, 'General Expenses', 'Expense'), # 一般费用账户/费用账户,记录各种运营支出
    (102, 'IT Department', 'Department'), # IT部门账户/部门账户,记录特定部门(如IT部门)的费用和支出
    (103, 'Maintenance', 'Service'), # 维护账户/服务账户,记录与外部服务相关的费用,如维护费、订阅服务费等
    (104, 'Travel', 'Expense'), # 差旅费用账户/费用账户,记录各种运营支出
    (105, 'Conferences', 'Event'), # 会议费用账户/事件账户,记录特定事件或活动的费用,如会议费用。
    (106, 'Subscriptions', 'Service'), # 订阅服务账户/服务账户,记录与外部服务相关的费用,如维护费、订阅服务费等
    (107, 'Utilities', 'Expense'), # 公用事业费用账户/费用账户,记录水电气等公共设施的费用
    (108, 'Renovations', 'Project') # 装修项目账户/项目账户,记录特定项目的费用,如装修项目
])

# 查询数据
cur.execute('SELECT * FROM transactions')
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['transaction_id', 'date', 'amount', 'description', 'account_id'])
print(df)

# 关闭连接
# conn.close()
    transaction_id        date  amount            description  account_id
0                1  2023-01-01      50        Office Supplies         101
1                2  2023-01-05     300      Software Purchase         102
2                3  2023-01-10    1500  Equipment Maintenance         103
3                4  2023-01-15      75        Travel Expenses         104
4                5  2023-01-20     450     Marketing Campaign         101
5                6  2023-01-25    1200        Conference Fees         105
6                7  2023-02-01     200   Subscription Service         106
7                8  2023-02-05      95           Utility Bill         107
8                9  2023-02-10     400     Equipment Purchase         101
9               10  2023-02-15     700      Office Renovation         108
10              11  2023-02-20      30             Stationery         102
11              12  2023-02-25     500             Legal Fees         103

2. 筛选特定日期范围内的交易, 并按交易金额从高到低排序

In [ ]:
sql = '''
    SELECT * FROM transactions
    WHERE date BETWEEN '2023-01-01' AND '2023-01-31'
    ORDER BY amount DESC;
    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['transaction_id', 'date', 'amount', 'description', 'account_id'])
print(df)
   transaction_id        date  amount            description  account_id
0               3  2023-01-10    1500  Equipment Maintenance         103
1               6  2023-01-25    1200        Conference Fees         105
2               5  2023-01-20     450     Marketing Campaign         101
3               2  2023-01-05     300      Software Purchase         102
4               4  2023-01-15      75        Travel Expenses         104
5               1  2023-01-01      50        Office Supplies         101

3. 聚合函数和分组

(1)计算每个账户的总交易金额
In [ ]:
sql = '''
    SELECT account_id, SUM(amount) as total_amount
    FROM transactions
    GROUP BY account_id;
    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['account_id', 'total_amount'])
print(df)
   account_id  total_amount
0         101           900
1         102           330
2         103          2000
3         104            75
4         105          1200
5         106           200
6         107            95
7         108           700
(2)统计每个月的交易次数
In [ ]:
sql = '''
    SELECT strftime('%Y-%m', date) as month, COUNT(*) as transaction_count
    FROM transactions
    GROUP BY month;
    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['month', 'transaction_count'])
print(df)
     month  transaction_count
0  2023-01                  6
1  2023-02                  6

4. 使用CASE WHEN进行条件查询

(1)根据交易金额分类
In [ ]:
sql = '''
    SELECT transaction_id, amount,
    CASE 
        WHEN amount < 100 THEN 'Small'
        WHEN amount BETWEEN 100 AND 500 THEN 'Medium'
        ELSE 'Large'
    END as amount_category
    FROM transactions;
    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['transaction_id', 'amount', 'amount_category'])
print(df)
    transaction_id  amount amount_category
0                1      50           Small
1                2     300          Medium
2                3    1500           Large
3                4      75           Small
4                5     450          Medium
5                6    1200           Large
6                7     200          Medium
7                8      95           Small
8                9     400          Medium
9               10     700           Large
10              11      30           Small
11              12     500          Medium
(2)计算各分类的总金额
In [ ]:
sql = '''
    SELECT 
    CASE 
        WHEN amount < 100 THEN 'Small'
        WHEN amount BETWEEN 100 AND 500 THEN 'Medium'
        ELSE 'Large'
    END as amount_category,
    SUM(amount) as total_amount
    FROM transactions
    GROUP BY amount_category;
    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['amount_category', 'amount_category'])
print(df)
   amount_category  amount_category
0            Large             3400
1           Medium             1850
2            Small              250

5. 左外连接 Left Outer Join

(1)查询每笔交易对应的账户名称
In [ ]:
# 表间连接方式(等价于内连接)
def fun_1():
    sql = '''
        SELECT t.transaction_id, t.date, t.amount, a.account_name
        FROM transactions t, accounts a
        WHERE t.account_id = a.account_id;
        '''

    # 查询数据
    cur.execute(sql)
    rows = cur.fetchall()

    # 使用pandas展示结果
    df = pd.DataFrame(rows, columns=['transaction_id', 'date', 'amount', 'account_name'])
    print(df)

fun_1()
    transaction_id        date  amount      account_name
0                1  2023-01-01      50  General Expenses
1                2  2023-01-05     300     IT Department
2                3  2023-01-10    1500       Maintenance
3                4  2023-01-15      75            Travel
4                5  2023-01-20     450  General Expenses
5                6  2023-01-25    1200       Conferences
6                7  2023-02-01     200     Subscriptions
7                8  2023-02-05      95         Utilities
8                9  2023-02-10     400  General Expenses
9               10  2023-02-15     700       Renovations
10              11  2023-02-20      30     IT Department
11              12  2023-02-25     500       Maintenance
In [ ]:
# 左外连接方式
def fun_2():
    sql = '''
        SELECT t.transaction_id, t.date, t.amount, a.account_name
        FROM transactions t
        LEFT OUTER JOIN accounts a ON t.account_id = a.account_id;
        '''

    # 查询数据
    cur.execute(sql)
    rows = cur.fetchall()

    # 使用pandas展示结果
    df = pd.DataFrame(rows, columns=['transaction_id', 'date', 'amount', 'account_name'])
    print(df)
fun_2()
    transaction_id        date  amount      account_name
0                1  2023-01-01      50  General Expenses
1                2  2023-01-05     300     IT Department
2                3  2023-01-10    1500       Maintenance
3                4  2023-01-15      75            Travel
4                5  2023-01-20     450  General Expenses
5                6  2023-01-25    1200       Conferences
6                7  2023-02-01     200     Subscriptions
7                8  2023-02-05      95         Utilities
8                9  2023-02-10     400  General Expenses
9               10  2023-02-15     700       Renovations
10              11  2023-02-20      30     IT Department
11              12  2023-02-25     500       Maintenance
In [ ]:
# 插入新数据到transactions表,该数据的account_id在accounts表中尚未维护
cur.executemany('''
INSERT INTO transactions (transaction_id, date, amount, description, account_id) VALUES (?, ?, ?, ?, ?)
''', [
    (13, '2023-03-01', 760.00, 'Development Fees', 109)
])

fun_1()
print('-------------')
fun_2()
    transaction_id        date  amount      account_name
0                1  2023-01-01      50  General Expenses
1                2  2023-01-05     300     IT Department
2                3  2023-01-10    1500       Maintenance
3                4  2023-01-15      75            Travel
4                5  2023-01-20     450  General Expenses
5                6  2023-01-25    1200       Conferences
6                7  2023-02-01     200     Subscriptions
7                8  2023-02-05      95         Utilities
8                9  2023-02-10     400  General Expenses
9               10  2023-02-15     700       Renovations
10              11  2023-02-20      30     IT Department
11              12  2023-02-25     500       Maintenance
-------------
    transaction_id        date  amount      account_name
0                1  2023-01-01      50  General Expenses
1                2  2023-01-05     300     IT Department
2                3  2023-01-10    1500       Maintenance
3                4  2023-01-15      75            Travel
4                5  2023-01-20     450  General Expenses
5                6  2023-01-25    1200       Conferences
6                7  2023-02-01     200     Subscriptions
7                8  2023-02-05      95         Utilities
8                9  2023-02-10     400  General Expenses
9               10  2023-02-15     700       Renovations
10              11  2023-02-20      30     IT Department
11              12  2023-02-25     500       Maintenance
12              13  2023-03-01     760              None
(2)按账户类型统计交易金额
In [ ]:
sql = '''
    SELECT a.account_type, SUM(t.amount) as total_amount
    FROM transactions t
    LEFT JOIN accounts a ON t.account_id = a.account_id
    GROUP BY a.account_type;
    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['account_type', 'total_amount'])
print(df)
  account_type  total_amount
0         None           760
1   Department           330
2        Event          1200
3      Expense          1070
4      Project           700
5      Service          2200

练习

1. 基础查询与数据聚合

背景:你是一名数据分析师,负责分析公司财务交易数据。你的任务是从transactions和accounts表中提取和聚合数据,以生成有用的财务报告。

任务:

编写SQL查询,提取所有交易记录,显示交易ID、日期、金额、描述和账户名称。
编写SQL查询,按账户类型计算每个月的总交易金额。
In [ ]:
# 提取所有交易记录
sql = '''
    SELECT 
        t.transaction_id, 
        t.date, 
        t.amount, 
        t.description, 
        a.account_name
    FROM 
        transactions t
    LEFT JOIN 
        accounts a 
    ON 
        t.account_id = a.account_id;

    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['transaction_id', 'date', 'amount', 'description', 'account_name'])
print(df)
    transaction_id        date  amount            description  \
0                1  2023-01-01      50        Office Supplies   
1                2  2023-01-05     300      Software Purchase   
2                3  2023-01-10    1500  Equipment Maintenance   
3                4  2023-01-15      75        Travel Expenses   
4                5  2023-01-20     450     Marketing Campaign   
5                6  2023-01-25    1200        Conference Fees   
6                7  2023-02-01     200   Subscription Service   
7                8  2023-02-05      95           Utility Bill   
8                9  2023-02-10     400     Equipment Purchase   
9               10  2023-02-15     700      Office Renovation   
10              11  2023-02-20      30             Stationery   
11              12  2023-02-25     500             Legal Fees   

        account_name  
0   General Expenses  
1      IT Department  
2        Maintenance  
3             Travel  
4   General Expenses  
5        Conferences  
6      Subscriptions  
7          Utilities  
8   General Expenses  
9        Renovations  
10     IT Department  
11       Maintenance  
In [ ]:
# 按账户类型计算每个月的总交易金额
sql = '''
    SELECT 
        strftime('%Y-%m', date) AS month, 
        a.account_type, 
        SUM(t.amount) AS total_amount
    FROM 
        transactions t
    LEFT JOIN 
        accounts a 
    ON 
        t.account_id = a.account_id
    GROUP BY 
        month, 
        a.account_type
    ORDER BY 
        month, 
        a.account_type;
    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['month', 'account_type', 'total_amount'])
print(df)
     month account_type  total_amount
0  2023-01   Department           300
1  2023-01        Event          1200
2  2023-01      Expense           575
3  2023-01      Service          1500
4  2023-02   Department            30
5  2023-02      Expense           495
6  2023-02      Project           700
7  2023-02      Service           700
2:使用CASE WHEN进行条件查询与连接

背景:你需要对公司不同类型的财务交易进行分类,并生成详细的财务报告。

任务:

编写SQL查询,将每笔交易根据金额大小分类,并显示交易ID、日期、金额、描述、账户名称和分类结果。
编写SQL查询,计算各账户类型在不同交易分类下的总金额。
In [ ]:
# 根据金额大小分类交易
sql = '''
    SELECT 
        t.transaction_id, 
        t.date, 
        t.amount, 
        t.description, 
        a.account_name,
        CASE 
            WHEN t.amount < 100 THEN 'Small'
            WHEN t.amount BETWEEN 100 AND 500 THEN 'Medium'
            ELSE 'Large'
        END AS amount_category
    FROM 
        transactions t
    LEFT JOIN 
        accounts a 
    ON 
        t.account_id = a.account_id;
    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['transaction_id', 'date', 'amount', 'description', 'account_name', 'amount_category'])
print(df)
    transaction_id        date  amount            description  \
0                1  2023-01-01      50        Office Supplies   
1                2  2023-01-05     300      Software Purchase   
2                3  2023-01-10    1500  Equipment Maintenance   
3                4  2023-01-15      75        Travel Expenses   
4                5  2023-01-20     450     Marketing Campaign   
5                6  2023-01-25    1200        Conference Fees   
6                7  2023-02-01     200   Subscription Service   
7                8  2023-02-05      95           Utility Bill   
8                9  2023-02-10     400     Equipment Purchase   
9               10  2023-02-15     700      Office Renovation   
10              11  2023-02-20      30             Stationery   
11              12  2023-02-25     500             Legal Fees   

        account_name amount_category  
0   General Expenses           Small  
1      IT Department          Medium  
2        Maintenance           Large  
3             Travel           Small  
4   General Expenses          Medium  
5        Conferences           Large  
6      Subscriptions          Medium  
7          Utilities           Small  
8   General Expenses          Medium  
9        Renovations           Large  
10     IT Department           Small  
11       Maintenance          Medium  
In [ ]:
# 计算各账户类型在不同交易分类下的总金额
sql = '''
    SELECT 
        a.account_type,
        CASE 
            WHEN t.amount < 100 THEN 'Small'
            WHEN t.amount BETWEEN 100 AND 500 THEN 'Medium'
            ELSE 'Large'
        END AS amount_category,
        SUM(t.amount) AS total_amount
    FROM 
        transactions t
    LEFT JOIN 
        accounts a 
    ON 
        t.account_id = a.account_id
    GROUP BY 
        a.account_type,
        amount_category
    ORDER BY 
        a.account_type,
        amount_category;
    '''

# 查询数据
cur.execute(sql)
rows = cur.fetchall()

# 使用pandas展示结果
df = pd.DataFrame(rows, columns=['account_type', 'amount_category', 'total_amount'])
print(df)
  account_type amount_category  total_amount
0   Department          Medium           300
1   Department           Small            30
2        Event           Large          1200
3      Expense          Medium           850
4      Expense           Small           220
5      Project           Large           700
6      Service           Large          1500
7      Service          Medium           700
In [ ]:
# 关闭连接
conn.close()
In [ ]: