Search notes:

DBMS_STATS.CREATE_EXTENDED_STATS

drop   table tq84_tab;
For the demonstration, we need a table:
create table tq84_tab (
   id  integer       not null,
   val number(5,1)   not null,
   txt varchar2(10)  not null
);
The table is filled with some random data. In every third row, the value of txt ends in XXX:
begin
   dbms_random.seed(42);
end;
/

insert into tq84_tab
select
   level,
   dbms_random.value(1, 9999),
   dbms_random.string('u', 7) ||
      case mod(level, 3)
         when 0 then 'XXX'
         else         dbms_random.string('u', 3)
      end
from
   dual connect by level <= 10000;
Gathering the statistics for the table
begin
   dbms_stats.gather_table_stats(user, 'tq84_tab');
end;
/
The statistics are not sufficient to estimate the number of rows with substr(txt, -3) = 'XXX' correctly:
explain plan for
select count(*)
from
   tq84_tab
where
   substr(txt, -3) = 'XXX'
;
The optimizer thinks it's 100:
select * from table(dbms_xplan.display(format=>'basic +rows'));
--
-- -----------------------------------------------
-- | Id  | Operation          | Name     | Rows  |
-- -----------------------------------------------
-- |   0 | SELECT STATEMENT   |          |     1 |
-- |   1 |  SORT AGGREGATE    |          |     1 |
-- |   2 |   TABLE ACCESS FULL| TQ84_TAB |   100 |
-- -----------------------------------------------
In order to assist the optimizer with better statistics, we create extended statistics.
select
   dbms_stats.create_extended_stats(
      user,
     'tq84_tab',
     '(substr(txt, -3))'  -- Extensions must be put between parantheses!
   )  extended_stat_name
from
   dual;
The extended statistics show up in user_stat_extensions:
select
   extension,
   extension_name,
   creator,
   droppable
from
   user_stat_extensions
where
   table_name = 'TQ84_TAB';
The values with which the extended statistics are calculated are stored in a virtual and hidden column:
select
   col.column_name,
-- col.qualified_col_name,
   col.data_default,
   col.num_distinct,
   col.hidden_column,
   col.virtual_column,
   col.sample_size
from
   user_tab_cols col
where
   col.table_name = 'TQ84_TAB';
dbms_stats.create_extended_stats did not gather statistics for the new virtual column:
select
   col.column_name,
   col.last_analyzed
from
   user_tab_col_statistics col
where
   col.table_name = 'TQ84_TAB';
Therefore, the estimation for the select statement is still wrong:
explain plan for
select count(*)
from
   tq84_tab
where
   substr(txt, -3) = 'XXX'
;

select * from table(dbms_xplan.display(format=>'basic +rows'));
--
-- -----------------------------------------------
-- | Id  | Operation          | Name     | Rows  |
-- -----------------------------------------------
-- |   0 | SELECT STATEMENT   |          |     1 |
-- |   1 |  SORT AGGREGATE    |          |     1 |
-- |   2 |   TABLE ACCESS FULL| TQ84_TAB |   100 |
-- -----------------------------------------------
So, we gather the statistics again for the table:
begin
   dbms_stats.gather_table_stats(
      user,
     'tq84_tab'
   );
end;
/
We try again to show the plan for the statement:
explain plan for
select count(*)
from
   tq84_tab
where
   substr(txt, -3) = 'XXX'
;
This time, the estimation is correct: 3333 rows:
select * from table(dbms_xplan.display(format=>'basic +rows'));
--
-- -----------------------------------------------
-- | Id  | Operation          | Name     | Rows  |
-- -----------------------------------------------
-- |   0 | SELECT STATEMENT   |          |     1 |
-- |   1 |  SORT AGGREGATE    |          |     1 |
-- |   2 |   TABLE ACCESS FULL| TQ84_TAB |  3333 |
-- -----------------------------------------------
Let's also try txt like %XXX:
explain plan for
select count(*)
from
   tq84_tab
where
   txt like '%XXX';
It turns out, the optimizer (at least in 19c) still cannot correctly determine the number of rows:
select * from table(dbms_xplan.display(format=>'basic +rows'));
--
-- -----------------------------------------------
-- | Id  | Operation          | Name     | Rows  |
-- -----------------------------------------------
-- |   0 | SELECT STATEMENT   |          |     1 |
-- |   1 |  SORT AGGREGATE    |          |     1 |
-- |   2 |   TABLE ACCESS FULL| TQ84_TAB |   500 |
-- -----------------------------------------------

See also

dbms_stats

Index