oracle database expert performance tuning buffer pools keep recycle sqlplus query TOAD system trigger plsql programming
Edward Stoever is your
DATABASE-EXPERT
Computer solutions for businesses of all sizes!
(818) 528-4111
 Home  Résumé  Articles  Portfolio  Contact  YouTube 
-- UPDATE, JULY 2006 --
Here is a reliable way to use the analytical function ROW_NUMBER() 
to delete duplcates with one sql statement, very fast! 
/* ©2006 by Edward Stoever,  */

DELETE FROM goremal
      WHERE ROWID IN (
--
               SELECT ROWID
                 FROM (SELECT   ROWID,
                                ROW_NUMBER () OVER
             (PARTITION BY goremal_pidm ORDER BY goremal_preferred_ind DESC,
                                goremal_activity_date DESC) AS rownumber,
                                goremal_pidm, goremal_emal_code,
                                goremal_email_address, goremal_status_ind,
                                goremal_preferred_ind, goremal_activity_date
                           FROM goremal
                          WHERE    goremal_email_address
                                || goremal_pidm
                                || goremal_emal_code IN (
                                   SELECT      goremal_email_address
                                            || goremal_pidm
                                            || goremal_emal_code
                                       FROM goremal
                                   GROUP BY goremal_pidm,
                                            goremal_emal_code,
                                            goremal_email_address
                                     HAVING COUNT (*) > 1)
                       ORDER BY goremal_pidm,
                                goremal_preferred_ind DESC,
                                goremal_activity_date DESC)
                WHERE rownumber <> 1
--
            );
			
-- In the above deletion, you can figure out what it is doing by working from the
-- inner-most query out. The first query finds duplicates where the new primary 
-- key, based on three columns, are concatenated together. Then, it just looks 
-- throught the table and finds those values again from all rows, orders them 
-- based upon criteria most desirable for keeping such as an indicator, a date. 
-- It deletes the duplicates that are not most desirable, and if there are any 
-- exact duplcates, it only deletes one of them. Works great and is easier to use
-- and understand than the PL/SQL below!



-- ORACLE - DELETE DUPLICATES EASILY
-- I recently did some work for a private client whose database was designed in a 
-- very sloppy manner. The database had no primary keys or foreign keys at all!
-- I needed a way to delete duplicate rows so that I could apply primary keys 
-- in all the necessary places. Most queries that look for duplcate rows are 
-- extremely slow. One query that can find such rows is:
/*
    SELECT   col_1, col_2, COUNT (*)
          FROM table_name
      GROUP BY col_1, col_2
        HAVING COUNT (*) > 1;
*/
-- Note: this table would have a primary key on the combination of col_1 and col_2

-- This query is fast. The poblem with it is that it returns BOTH rows that 
-- duplicate each other and we only want to delete ONE of the rows in order to apply our 
-- primary key. The following anonymous block transforms the above query into a 
-- delete statement for one of the rows, and it is still very fast!


DECLARE -- Code ©2004 by Edward Stoever
   CURSOR c_get_duplicates
   IS
      SELECT   user_login, user_password, COUNT (*)
          FROM sp.sp_users
      GROUP BY user_login, user_password
        HAVING COUNT (*) > 1;

   var_get_duplicates   c_get_duplicates%ROWTYPE;

   CURSOR c_del_only_one
   IS
      SELECT ROWID
        FROM sp.sp_users
       WHERE user_login = var_get_duplicates.user_login
         AND user_password = var_get_duplicates.user_password;

   var_del_only_one     ROWID;
BEGIN
   OPEN c_get_duplicates;

   LOOP
      FETCH c_get_duplicates
       INTO var_get_duplicates;

      EXIT WHEN c_get_duplicates%NOTFOUND;

      OPEN c_del_only_one;

      FETCH c_del_only_one
       INTO var_del_only_one;

      DELETE FROM sp.sp_users
            WHERE ROWID = var_del_only_one;

      COMMIT;

      CLOSE c_del_only_one;
   END LOOP;

   CLOSE c_get_duplicates;
END;
/

--NOTE - This script needs a tweak to handle values that might be NULL.
--Here is an example of how I ran it recently on values that were NULL.

DECLARE     -- Code ©2004 by Edward Stoever
   CURSOR c_get_duplicates
   IS
      SELECT   ssrfees_term_code, ssrfees_crn, ssrfees_detl_code,
               ssrfees_ftyp_code, ssrfees_levl_code, COUNT (*)
          FROM ssrfees
        HAVING COUNT (*) > 1
      GROUP BY ssrfees_term_code,
               ssrfees_crn,
               ssrfees_detl_code,
               ssrfees_ftyp_code,
               ssrfees_levl_code;

   var_get_duplicates   c_get_duplicates%ROWTYPE;

   CURSOR c_del_only_one
   IS
      SELECT ROWID
        FROM ssrfees
       WHERE ssrfees_term_code = var_get_duplicates.ssrfees_term_code
         AND ssrfees_crn = var_get_duplicates.ssrfees_crn
         AND ssrfees_detl_code = var_get_duplicates.ssrfees_detl_code
         AND NVL(ssrfees_ftyp_code,'1') = NVL(var_get_duplicates.ssrfees_ftyp_code,'1')
         AND NVL(ssrfees_levl_code,'1') = NVL(var_get_duplicates.ssrfees_levl_code,'1');

   var_del_only_one     ROWID;
BEGIN
   OPEN c_get_duplicates;

   LOOP
      FETCH c_get_duplicates
       INTO var_get_duplicates;

      EXIT WHEN c_get_duplicates%NOTFOUND;

      OPEN c_del_only_one;

      FETCH c_del_only_one
       INTO var_del_only_one;

      DELETE FROM ssrfees
            WHERE ROWID = var_del_only_one;

      COMMIT;

      CLOSE c_del_only_one;
   END LOOP;

   CLOSE c_get_duplicates;
END;
/


©2017 Edward Stoever