2 ########################################################################
3 # Copyright (c) 2000, 2001 by Donald Sharp <sharpd@cisco.com>
6 # Permission is granted to copy and/or distribute this file, with or
7 # without modifications, provided this notice is preserved.
9 ########################################################################
13 Script to check the integrity of the Repository
21 This script will search through a repository and determine if
22 any of the files in it are corrupted.
24 Please do not run this script inside of the repository itself,
25 it will cause it too fail.
27 Also it currently can only be run over the entire repository,
28 so only point your CVSROOT at the actual CVSROOT.
36 setenv CVSROOT /release/111/cvs
38 # To see more verbose output
49 ######################################################################
51 ######################################################################
59 ######################################################################
61 ######################################################################
63 my @list_of_broken_files;
68 my $total_interesting_revisions;
72 ######################################################################
74 ######################################################################
76 ######################################################################
82 # To search the repository for broken files
88 # $ENV{ CVSROOT } - The CVS repository to search through
89 # $ENV{ CVSDEBUGEDIT } - Turn on Debugging.
90 # @list_of_broken_files - The list of files that need to
92 # $verbose - is verbose mode on?
93 # $total_revisions - The number of revisions considered
94 # $total_interesting_revisions - The number of revisions used
95 # $total_files - The total number of files looked at.
98 # A list of broken files
101 # Do not run this script inside the repository. Choose
102 # a nice safe spot( like /tmp ) outside of the repository.
104 ######################################################################
105 my $directory_to_look_at;
107 select (STDOUT); $| = 1; # make unbuffered
109 $total_revisions = 0;
110 $total_interesting_revisions = 0;
113 if( !exists( $ENV{ CVSROOT } ) )
115 die( "The script should be run with the CVSROOT environment variable set" );
118 if( exists( $ENV{ CVSDEBUGEDIT } ) )
121 print( "Verbose Mode Turned On\n" );
124 $directory_to_look_at = $ENV{ CVSROOT };
126 while( -l $directory_to_look_at )
128 $directory_to_look_at = readlink( $directory_to_look_at );
132 die( "Encountered too many symlinks for $ENV{ CVSROOT }\n" );
136 print( "Processing: $directory_to_look_at\n" ) if( $verbose );
137 @ignore_files = &get_ignore_files_from_cvsroot( $directory_to_look_at );
138 find( \&process_file, $directory_to_look_at );
140 my $num_files = @list_of_broken_files;
141 print( "List of corrupted files\n" ) if( $num_files > 0 );
142 foreach my $broken ( @list_of_broken_files )
144 print( "**** File: $broken\n" );
147 $num_files = @extra_files;
148 print( "List of Files That Don't belong in Repository:\n" ) if( $num_files > 0 );
149 foreach my $extra ( @extra_files )
151 print( "**** File: $extra\n" );
153 print( "Total Files: $total_files\n" );
154 print( "Total Revisions: $total_revisions Interesting Revisions: $total_interesting_revisions\n" );
156 ######################################################################
162 # This function is called by the find function, it's purpose
163 # is to decide if it is important to look at a file or not.
164 # We only care about files that have the ,v at the end.
170 # $ENV{ CVSROOT } - The CVS repository to search through
178 ######################################################################
181 my $path = $File::Find::name;
184 $path =~ s/^$directory_to_look_at\///;
186 print( "\tProcessing File: $path\n" ) if( $verbose );
190 look_at_cvs_file( $path );
192 elsif( ! -d $File::Find::name )
196 foreach my $ignore ( @ignore_files )
198 if( $path =~ /$ignore/ )
207 push( @extra_files, $path );
212 ######################################################################
218 # To decide if a file is broken or not. The algorithm is:
219 # a) Get the revision history for the file.
220 # - If that fails the file is broken, save the fact
221 # and continue processing other files.
222 # - If that succeeds we have a list of revisions.
223 # b) For Each revision try to retrieve that version
224 # - If that fails the file is broken, save the fact
225 # and continue processing other files.
229 # $file - The file to look at.
238 # We have to handle Attic files in a special manner.
239 # Basically remove the Attic from the string if it
240 # exists at the end of the $path variable.
242 ######################################################################
246 my( $name, $path, $suffix ) = fileparse( $file );
248 if( $path =~ s/Attic\/$// )
250 $file = $path . $name;
253 my $revisions = get_history( $name );
255 if( !defined( $revisions ) )
257 print( "\t$file is corrupted, this was determined via a cvs log command\n" ) if( $verbose );
258 push( @list_of_broken_files, $file );
262 my @int_revisions = find_interesting_revisions( @$revisions );
264 foreach my $revision ( @int_revisions )
266 print( "\t\tLooking at Revision: $revision\n" ) if( $verbose );
267 if( !check_revision( $file, $revision ) )
269 print( "\t$file is corrupted in revision: $revision\n" ) if( $verbose );
270 push( @list_of_broken_files, $file );
277 ######################################################################
283 # To retrieve a array of revision numbers.
286 # $file - The file to retrieve the revision numbers for
292 # On Success - Reference to the list of revision numbers
293 # On Failure - undef.
296 # The $_ is saved off because The File::find functionality
297 # expects the $_ to not have been changed.
298 # The -N option for the rlog command means to spit out
299 # tags or branch names.
301 ######################################################################
305 $file =~ s/(["\$`\\])/\\$1/g;
311 open( FILE, "rlog -N \"$file\" 2>&1 |" ) or die( "unable to run rlog, help" );
315 #rlog outputs a "----" line before the actual revision
316 #without this we'll pick up peoples comments if they
317 #happen to start with revision
318 if( /^----------------------------$/ )
324 if( ( !$ignore ) && ( ( $revision ) = m/^revision (\S+)/ ) )
326 push( @revisions, $revision );
338 return( \@revisions );
341 ######################################################################
347 # Given a file and a revision number ensure that we can
348 # check out that file
351 # $file - The file to look at.
352 # $revision - The revision to look at.
358 # If we can get the File - 1
359 # If we can not get the File - 0
362 # cvs command line options are as followed:
363 # -n - Do not run any checkout program as specified by the -o
364 # option in the modules file
365 # -p - Put all output to standard out.
366 # -r - The revision of the file that we would like to look at.
367 # Please note that cvs will return 0 for being able to successfully
368 # read the file and 1 for failure to read the file.
370 ######################################################################
373 my( $file, $revision ) = @_;
374 $file =~ s/(["\$`\\])/\\$1/g;
379 my $ret_code = 0xffff & system( "cvs co -n -p -r $revision \"$file\" > /dev/null 2>&1" );
382 return( 1 ) if ( $ret_code == 0 );
388 ######################################################################
391 # find_interesting_revisions
394 # CVS stores information in a logical manner. We only really
395 # need to look at some interestin revisions. These are:
397 # And the last version on every branch.
398 # This is because cvs stores changes descending from
399 # main line. ie suppose the last version on mainline is 1.6
400 # version 1.6 of the file is stored in toto. version 1.5
401 # is stored as a diff between 1.5 and 1.6. 1.4 is stored
402 # as a diff between 1.5 and 1.4.
403 # branches are stored a little differently. They are
404 # stored in ascending order. Suppose there is a branch
405 # on 1.4 of the file. The first branches revision number
406 # would be 1.4.1.1. This is stored as a diff between
407 # version 1.4 and 1.4.1.1. The 1.4.1.2 version is stored
408 # as a diff between 1.4.1.1 and 1.4.1.2. Therefore
409 # we are only interested in the earliest revision number
410 # and the highest revision number on a branch.
413 # @revisions - The list of revisions to find interesting ones
419 # @new_revisions - The list of revisions that we find interesting
423 ######################################################################
424 sub find_interesting_revisions
426 my( @revisions ) = @_;
435 foreach my $revision( @revisions )
438 ( $branch_number, $branch_rev ) = branch_split( $revision );
440 #if the number of elements in the branch is 1
441 #and the new branch is less than the old branch
442 if( elements_in_branch( $branch_number ) == 1 )
445 %branch_revision ) = find_int_mainline_revision( $branch_number,
448 next START_OVER if( $start_over );
451 %branch_revision = find_int_branch_revision( $branch_number,
457 %branch_revision = remove_duplicate_branches( %branch_revision );
459 while( ( $key, $value ) = each ( %branch_revision ) )
461 push( @new_revisions, $key . "." . $value );
468 $nrc = @new_revisions;
470 $total_revisions += $rc;
471 $total_interesting_revisions += $nrc;
473 print( "\t\tTotal Revisions: $rc Interesting Revisions: $nrc\n" ) if( $verbose );
475 return( @new_revisions );
478 ########################################################################
481 # remove_duplicate_branches
484 # To remove from the list of branches that we are interested
485 # in duplication that will cause cvs to check a revision multiple
486 # times. For Instance revision 1.1.1.1 should be prefered
487 # to be checked over revision 1.1, as that v1.1.1.1 can
488 # only be retrieved by going through v1.1. Therefore
489 # we should remove v1.1 from the list of branches that
493 # %branch_revisions - The hash of the interesting revisions
499 # %branch_revisions - The hash of the modified interesting revisions
504 ########################################################################
505 sub remove_duplicate_branches
507 my( %branch_revisions ) = @_;
516 my @keys = keys( %branch_revisions );
517 while( ( $key, $value ) = each ( %branch_revisions ) )
519 $branch_comp = $key . "." . $value;
520 foreach $branch ( @keys )
522 if( $branch eq $key )
526 if( elements_in_branch( $branch_comp ) ==
527 elements_in_branch( $branch ) - 1 )
529 if( $branch =~ /^$branch_comp/ )
531 delete( $branch_revisions{ $key } );
539 return( %branch_revisions );
542 ######################################################################
545 # find_int_branch_revision
548 # To Find a interesting branch revision.
550 # If the $branch_revision exists in the interesting branch
551 # hash and the new $branch_rev is less than currently saved
552 # one replace it with the new $branch_rev.
553 # else if the $branch_revision doesn't exist in the interesting
554 # branch hash, then just store the $branch_number and $branch_rev
557 # $branch_number - The branch that we are looking at
558 # $branch_rev - The particular revision we are looking
559 # at on the $branch_number.
560 # %branch_revision - The hash storing the interesting branches
561 # and the revisions on them.
567 # %branch_revision - The modified hash that stores interesting
573 ######################################################################
574 sub find_int_branch_revision
576 my( $branch_number, $branch_rev, %branch_revision ) = @_;
578 if( exists( $branch_revision{ $branch_number } ) )
580 if( $branch_rev > $branch_revision{ $branch_number } )
582 $branch_revision{ $branch_number } = $branch_rev;
587 $branch_revision{ $branch_number } = $branch_rev;
590 return( %branch_revision );
593 ######################################################################
596 # find_int_mainline_revision
599 # To Find a interesting mainline revision.
601 # if the $branch_number is less then a branch number
602 # with one element in it, then delete the old branch_number
604 # if the $branch_number is greater than a branch number
605 # then return, and tell the calling function that we
606 # should skip this element, as that it's not important.
607 # if the $branch_number is the same as a branch number
608 # with one element in it, then check to see if the
609 # $branch_rev is less than the stored branch rev if
610 # it is replace with new $branch_rev. Else ignore revision
613 # $branch_number - The branch that we are looking at
614 # $branch_rev - The particular revision we are looking
615 # at on the $branch_number.
616 # %branch_revision - The hash storing the interesting branches
617 # and the revisions on them.
623 # ( $skip, %branch_revision ) -
624 # $skip - 1 if we need to ignore this particular $branch_number
625 # $branch_rev combo. Else 0.
626 # %branch_revision - The modified hash that stores interesting
632 ######################################################################
633 sub find_int_mainline_revision
635 my( $branch_number, $branch_rev, %branch_revision ) = @_;
637 foreach my $key ( keys %branch_revision )
639 if( elements_in_branch( $key ) == 1 )
641 if( $branch_number < $key )
643 delete( $branch_revision{ $key } );
647 if( $branch_number > $key )
649 return( 1, %branch_revision );
651 if( ( exists( $branch_revision{ $branch_number } ) ) &&
652 ( $branch_rev < $branch_revision{ $branch_number } ) )
654 $branch_revision{ $branch_number } = $branch_rev;
655 return( 1, %branch_revision );
660 return( 0, %branch_revision );
663 ######################################################################
669 # Determine the number of elements in a revision number
670 # Elements are defined by numbers seperated by ".".
671 # the revision 1.2.3.4 would have 4 elements
672 # the revision 1.2.4.5.6.7 would have 6 elements
675 # $branch - The revision to look at.
681 # $count - The number of elements
686 ######################################################################
687 sub elements_in_branch
692 @split_rev = split /\./, $branch;
694 my $count = @split_rev;
698 ######################################################################
704 # To split up a revision number up into the branch part and
705 # the number part. For Instance:
706 # 1.1.1.1 - is split 1.1.1 and 1
707 # 2.1 - is split 2 and 1
708 # 1.3.4.5.7.8 - is split 1.3.4.5.7 and 8
711 # $revision - The revision to look at.
717 # ( $branch, $revision ) -
718 # $branch - The branch part of the revision number
719 # $revision - The revision part of the revision number
724 ######################################################################
727 my( $revision ) = @_;
733 @split_rev = split /\./, $revision;
735 my $numbers = @split_rev;
736 @split_rev = reverse( @split_rev );
737 $branch = pop( @split_rev );
738 for( $count = 0; $count < $numbers - 2 ; $count++ )
740 $branch .= "." . pop( @split_rev );
743 return( $branch, pop( @split_rev ) );
746 ######################################################################
749 # get_ignore_files_from_cvsroot
752 # Retrieve the list of files from the CVSROOT/ directory
753 # that should be ignored.
754 # These are the regular files (e.g., commitinfo, loginfo)
755 # and those specified in the checkoutlist file.
764 # @ignore - the list of files to ignore
769 ######################################################################
770 sub get_ignore_files_from_cvsroot {
772 my @ignore = ( 'CVS\/fileattr$',
774 '^CVSROOT\/.#loginfo',
776 '^CVSROOT\/.#rcsinfo',
777 '^CVSROOT\/editinfo',
778 '^CVSROOT\/.#editinfo',
779 '^CVSROOT\/verifymsg',
780 '^CVSROOT\/.#verifymsg',
781 '^CVSROOT\/commitinfo',
782 '^CVSROOT\/.#commitinfo',
784 '^CVSROOT\/.#taginfo',
785 '^CVSROOT\/cvsignore',
786 '^CVSROOT\/.#cvsignore',
787 '^CVSROOT\/checkoutlist',
788 '^CVSROOT\/.#checkoutlist',
789 '^CVSROOT\/cvswrappers',
790 '^CVSROOT\/.#cvswrappers',
792 '^CVSROOT\/.#notify',
794 '^CVSROOT\/.#modules',
796 '^CVSROOT\/.#readers',
798 '^CVSROOT\/.#writers',
801 '^CVSROOT\/.#config',
802 '^CVSROOT\/val-tags',
803 '^CVSROOT\/.#val-tags',
804 '^CVSROOT\/history' );
805 my $checkoutlist_file = "$cvsroot\/CVSROOT\/checkoutlist";
806 open( CHECKOUTLIST, "<$cvsroot\/CVSROOT\/checkoutlist" )
807 or die( "Unable to read checkoutlist file: $!\n" );
809 my @list = <CHECKOUTLIST>;
811 close( CHECKOUTLIST )
812 or die( "Unable to close checkoutlist file: $!\n" );
814 foreach my $line( @list )
816 next if( $line =~ /^#/ || $line =~ /^$/ );
817 if( $line =~ /^\s*(\S*)\s*/ ) { $line = $1 };
818 push( @ignore, "^CVSROOT\/$line", "^CVSROOT\/\.#$line" );