#!/usr/bin/perl -w # # cvslog: merge cvs logs of multiple files into one # # Copyright (C) 2001 Tony Finch # # $dotat: scripts/cvslog,v 1.38 2002/08/12 10:26:49 fanf2 Exp $ # # Permission is granted to use, modify, and distribute this software and its # documentation, provided that the copyright and version information, this # permission notice, and the following disclaimer appear in all copies, and # that modified copies are clearly marked as such and are not endorsed or # promoted using the name of the author without written permission. # # This software and its documentation are provided "as is", without warranty # of any kind: in no event shall the author be held liable for any damages # arising from the use of this software or its documentation. use strict; use Getopt::Long; use IO::Pipe; sub log_entry ($$$$$$); sub splitrev ($); sub isbranch ($); sub tabulate (@); sub unixtime ($); sub debug ($); #### data structures etc. # A particular version of a particular file is called a "filerev" # which is the concatenation of the RCS filename and the version # number with a hyphen between them. # We keep lists of filerevs as the keys of a hash like this: # # $revlist = { $filerev => $lines, ... }; # A reference to the following structure represents a change set. # # $change = { # revs => $revlist, # time => $time, # date => $date, # author => $author, # message => $message, # }; # This is a list of all the changes. # my @changes; # We need to be able to look up the change for a given filerev. # This hash may contain more than one reference to the same change. # my %revchange; # We keep an index of changes keyed on the # commit message and author in order to combine # multi-file commits into one entry. # # $similar{"$author\n$message"} = [ $change, ... ]; # my %similar; # A single commit might not be completed within # one second, so we allow a fudge factor when # comparing the time stamps of commits that we # think might be the same. The fudge factor is # larger than you might expect because when doing # a single commit that spans multiple directories # to a repository on the local machine, cvs gives # you the opportunity to re-edit the commit # message for each directory and commits the # relevant files immediately after each edit. # my $time_fudge = 30; # Fake changes that represent the start and end of all # time. Note that perl produces an unsigned result for # the ~ operator so it will always be bigger than the # maximum time_t which is signed. # my $alpha = { time => -~0, date => "?" }; my $omega = { time => ~0, date => "?" }; # A reference to the following structure represents # a tag or a branch. The before and after entries # refer to the last commit before and the first # commit after the tag or branch was created. This # may be incorrect if tags of the same name have # been laid down on different files at different # times. What is worse, the same name may be used # for a tag in one file and a branch in another! # # $tag = { # revs => $revlist, # before => $change1, # after => $change2, # these two are only really used for debugging # name => $tagname, # type => "tag" or "branch", # }; # Tags with the same name in different files are # combined using this data structure. # # $tag{$tagname} = $tag; # my %tag; # We keep a list of the tags that apply to each filerev. # We enter branches here twice, once for the revision # just before the branch was created (for working out # when that happened), and once for the branch number # (which may be distinguished from a normal revision # number because it has an even number of dots). # # $revtags{$filerev} = [ $tagname, ... ]; # my %revtags; # regexps etc. for parsing the cvs log output # my $file_re = '^RCS file: (.*)$'; my $tag_re = '^\s+([^:]+):\s+([0-9.]+)$'; my $head_end = "description:\n"; # my $lineN = "=" x 77 . "\n"; my $line0 = "-" x 28 . "\n"; my $line1_re = '^revision\s+([0-9.]+)$'; my $line2_re = '^date:\s+([^;]*);\s+author:\s+([^;]*);\s+' . 'state:\s+([^;]*);(\s+lines:)?(.*)$'; my $line3_re = '^branches:(\s+[0-9.]+;)+$'; #### parse options my %option; Getopt::Long::Configure 'bundling'; GetOptions (\%option, "debug|d!", "noless|L!", "brieftags|n!", "notags|N!", "pipe|p!") or die "usage: cvslog [-d] [-L] [-n] [-N] [-p] [file ...]\n", "\t-d\tdebugging output\n", "\t-L\tdon't run pager\n", "\t-n\tshow less tag information\n", "\t-N\tdo not show any tags\n", "\t-p\tread input from pipe\n"; #### work out where to get the cvs log input from my $inpipe; if ($option{pipe}) { $inpipe = new_from_fd IO::Handle fileno(STDIN), "r"; die "$0: fdopen: $!\n" unless defined $inpipe; } else { $inpipe = new IO::Pipe; die "$0: pipe: $!\n" unless defined $inpipe; $inpipe->reader("cvs", "log", @ARGV); } #### work out where to send output to my $outpipe; if ($option{noless} or not -t STDOUT) { $outpipe = new_from_fd IO::Handle fileno(STDOUT), "w"; die "$0: fdopen: $!\n" unless defined $outpipe; } else { $outpipe = new IO::Pipe; die "$0: pipe: $!\n" unless defined $outpipe; $outpipe->writer($ENV{PAGER}); } #### slurp in the logs for (;;) { my $file; my $line; # read tag metadata etc. and find start of revision messages while($line = $inpipe->getline) { if ($line =~ /$file_re/o) { $file = $1; debug "RCS file $file"; next; } if ($line =~ /$tag_re/o and not $option{notags}) { $tag{$1}->{name} = $1; my $tag = $tag{$1}; my $filerev = "$file-$2"; # fix branch revision numbers $filerev =~ s/\.0(\.\d+)$/$1/; $tag->{revs}->{$filerev} = ""; push @{$revtags{$filerev}}, $tag->{name}; $tag->{before} ||= $alpha; $tag->{after} ||= $omega; if (isbranch $filerev) { $tag->{type} = "branch"; my $startrev = splitrev $filerev; push @{$revtags{$startrev}}, $tag->{name}; } else { $tag->{type} = "tag"; } debug "$tag->{type} $tag->{name} rev $filerev"; next; } if ($line eq $head_end) { debug "end of header"; last; } } last unless defined $line; # We have to be sure that we have all three lines of the header # for a revision entry before we stop accumulating the previous # commit's message. We process the log line by line, and keep # what we expect the next line to be in the state variable. my $state = "message"; my $next_rev; my $rev; my $date; my $author; my $lines; my $message; while($line = $inpipe->getline) { if ($state eq "line1") { if ($line =~ /$line1_re/o) { debug "line1 $line"; $next_rev = $1; $state = "line2"; next; } $message .= $line0; } if ($state eq "line2") { if ($line =~ /$line2_re/o) { debug "line2 $line"; # we now have the header for the next change # so save the previous change if there was one log_entry $file, $rev, $date, $author, $lines, $message if defined $rev; # and remember the header info of this change $rev = $next_rev; $date = $1; $author = $2; $lines = $5; $message = ""; $state = "line3"; next; } $message .= $line0 . "revision $next_rev\n"; } if ($state eq "line3") { if ($line =~ /$line3_re/o) { debug "line3 $line"; # ignore this optional header line $state = "message"; next; } } $state = "message"; if ($line eq $lineN) { debug "end of file"; last; } if ($line eq $line0) { debug "start of entry"; $state = "line1"; next; } $message .= $line; } log_entry $file, $rev, $date, $author, $lines, $message if defined $rev; } #### cvs has finished $inpipe->close; #### create fake change entries for the tags unless ($option{brieftags}) { for my $tag (values %tag) { my $date; # skip tags for which we didn't see the actual revisions next unless $tag->{before}->{time} > $alpha->{time}; $date = "$tag->{before}->{date} ... $tag->{after}->{date}"; push @changes, { revs => $tag->{revs}, time => $tag->{before}->{time}, date => $date, author => "", message => "created $tag->{type} $tag->{name}\n" }; } } #### sort and print the changes for my $change (sort {$a->{time} <=> $b->{time}} @changes) { $outpipe->print("$change->{date} $change->{author}\n\n"); for my $filerev (sort {$a cmp $b} keys %{$change->{revs}}) { my $lines = $change->{revs}->{$filerev}; $filerev =~ /^(.*)-([0-9.]*)$/; my ($file,$rev) = ($1,$2); $outpipe->print(" $file\n"); my @tags; my @bps; for my $tagname (sort @{$revtags{$filerev}}) { if (exists $tag{$tagname}->{revs}->{$filerev}) { push @tags, $tagname; } else { push @bps, $tagname; } } my $branchrev = splitrev $filerev; my @branches = sort @{$revtags{$branchrev} or []}; unshift @tags, " tags:" if @tags; unshift @bps, " branch points:" if @bps; unshift @branches, " branches:" if @branches; $outpipe->print(" $rev$lines@bps@branches@tags\n"); } $outpipe->print("\n", $change->{message}); $outpipe->print("-" x 32, "\n"); } #### that's all, folks $outpipe->close; exit; #### add a log entry sub log_entry ($$$$$$) { my $file = shift; my $rev = shift; my $date = shift; my $author = shift; my $lines = shift; my $message = shift; my $filerev = "$file-$rev"; my $time = unixtime $date; debug "log entry $file $rev $date $author $lines"; my $change; # if there's a similar change within the time fudge # period, merge this change with it. # my $key = "$author\n$message"; for my $similar (@{$similar{$key}}) { if ($time_fudge > abs($time - $similar->{time})) { $change = $similar; last; } } # otherwise this is a separate change # unless (defined $change) { $change = { time => $time, date => $date, author => $author, message => $message, }; push @changes, $change; push @{$similar{$key}}, $change; } $change->{revs}->{$filerev} = $lines; $revchange{$filerev} = $change; # log entries turn up in reverse chronological # order by branch, which allows us to take a # short-cut with the book keeping: if this # revision is tagged then we can update the # tag's before and after timestamps based on # this revision and its successor which we # know we have already seen if there is one. # # We try to avoid bogus time stamps by ensuring # that they are in the interval that we have # already established, but this has the problem # that if the first file we come across is bogus # then we will never get near the truth. # my ($nextrev,$x) = splitrev $filerev; $nextrev = $nextrev.".".($x+1); my $after = $revchange{$nextrev} || $omega; for my $tagname (@{$revtags{$filerev}}) { my $tag = $tag{$tagname}; if ($tag->{before}->{time} < $time and $time < $tag->{after}->{time}) { $tag->{before} = $change; debug "tag $tag->{name} after $date"; } if ($tag->{before}->{time} < $after->{time} and $after->{time} < $tag->{after}->{time}) { $tag->{after} = $after; debug "tag $tag->{name} before $after->{date}"; } } } #### revision number utils sub splitrev ($) { my $rev = shift; $rev =~ s/\.(\d+)$//; if (wantarray) { return ($rev,$1); } else { return $rev; } } sub isbranch ($) { my $rev = shift; $rev =~ s/^.*-([0-9.]+)$/$1/; $rev =~ s/[^.]//g; return (1 & length $rev) ? 0 : 1; } #### convert human-readable time to unix time sub unixtime ($) { my $string = shift; $string =~ /(\d\d\d\d).(\d\d).(\d\d).(\d\d).(\d\d).(\d\d)/; my $year = $1; my $month = $2; my $day = $3; my $hour = $4; my $minute = $5; my $second = $6; use integer; $year -= $month < 3; $month += $month < 3 ? 10 : -2; my $time = $year/400 - $year/100 + $year/4 + $year*365 + $month*367/12 + $day - 719499; $time = $time * 86400 + $hour * 3600 + $minute * 60 + $second; return $time; } #### sub debug ($) { my $message = shift; print STDERR "$message\n" if $option{debug}; } #### EOF