Skip to main content

Fixing data type mismatches

T

This is a script for fixing data type mismatches based on PK and FK relations. These columns should match but often they don't.

WITH s AS (
    SELECT
        a.table_name,
        a.column_name,
        c.position,
        --
        CASE
            WHEN a.data_type = 'NUMBER' THEN
                a.data_type ||
                CASE WHEN a.data_precision IS NOT NULL THEN '(' || a.data_precision || DECODE(a.data_scale, 0, '', ', ' || a.data_scale) || ')' END
            WHEN a.data_type IN ('CHAR', 'VARCHAR', 'VARCHAR2') THEN
                a.data_type || '(' ||
                DECODE(a.char_used, 'C', a.char_length || ' CHAR', a.data_length) || ')'
            ELSE a.data_type
            END AS data_type,
        --
        n.constraint_type,
        c.constraint_name,
        n.r_constraint_name
    FROM user_tab_columns a
    JOIN user_tables t
        ON t.table_name         = a.table_name
    JOIN user_cons_columns c
        ON c.table_name         = a.table_name
        AND c.column_name       = a.column_name
    JOIN user_constraints n
        ON n.constraint_name    = c.constraint_name
        AND n.constraint_type   IN ('P', 'R')
    ORDER BY a.table_name, n.constraint_type, n.constraint_name, a.column_name
)
SELECT
    s.table_name    AS parent_table,
    s.column_name   AS parent_column,
    s.data_type     AS parent_type,
    b.table_name    AS foreign_table,
    b.column_name   AS foreign_column,
    b.data_type     AS foreign_type,
    --
    'ALTER TABLE ' || RPAD(b.table_name, 30) ||
        ' MODIFY ' || RPAD(b.column_name, 30) || ' ' || s.data_type || ';' AS fix
FROM s
JOIN s b ON b.r_constraint_name = s.constraint_name
    AND b.position = s.position
WHERE s.data_type != b.data_type
ORDER BY 1, 2, 4, 5;


This script suggests corrections for data type mismatches based on column names. It is not that reliable as script above because you can have same column names for different data types, so you have to read it carefuly.

WITH s AS (
    SELECT
        a.table_name,
        a.column_name,
        CASE
            WHEN a.data_type = 'NUMBER' THEN
                a.data_type ||
                CASE WHEN a.data_precision IS NOT NULL THEN '(' || a.data_precision || DECODE(a.data_scale, 0, '', ', ' || a.data_scale) || ')' END
            WHEN a.data_type IN ('CHAR', 'VARCHAR', 'VARCHAR2') THEN
                a.data_type || '(' ||
                DECODE(a.char_used, 'C', a.char_length || ' CHAR', a.data_length) || ')'
            ELSE a.data_type
        END AS data_type
    FROM user_tab_columns a
    JOIN user_tables t
        ON t.table_name     = a.table_name
    LEFT JOIN user_mviews m
        ON m.mview_name     = a.table_name
    WHERE t.table_name      NOT LIKE '%$' ESCAPE '\'    -- exceptions for DML err tables, audit tables...
        AND m.mview_name    IS NULL                     -- skip materialized views
),
r AS (
    SELECT r.*, MAX(r.count_) OVER (PARTITION BY r.column_name) AS max_count
    FROM (
        SELECT s.*, COUNT(s.data_type) OVER (PARTITION BY s.column_name, s.data_type) AS count_
        FROM s
        WHERE EXISTS (
            SELECT 1
            FROM s b
            WHERE s.column_name = b.column_name
                AND s.data_type != b.data_type
        )
    ) r    
)
SELECT DISTINCT
    r.table_name,
    r.column_name,
    r.data_type,
    --c.table_name,
    --
    'ALTER TABLE ' || RPAD(r.table_name, 30) ||
        ' MODIFY ' || RPAD(r.column_name, 30) || ' ' || c.data_type || ';' AS fix
FROM r
JOIN r c
    ON c.column_name    = r.column_name
    AND c.count_        = c.max_count
WHERE r.count_          < r.max_count
ORDER BY 2, 1;


Comments