Lines Of Code Counting

LM 2006-02-09: I travel by car to work everyday. The journey takes three quarter of an hour about (traffic allowing). Especially on the way back, my mind uses his background activities to solve small problems.

My last evening journey small problem was: counting c lines of code in TCL way. I discarded C style solution to the problem (probably using a finite state machine) or console linux commands. Pure TCL code.

This is my straight off solution.

Comments and optimizations are appreciated.

What's about TCL LOC counting?

 proc c_loc {fname} {
         set loc 0
         set cflag 0
         # Open file assuming all will be right
         set fd [open $fname r]
         while {![eof $fd]} {
                 # read a code line
                 gets $fd line
                 
                 # cut-off comments that are totally enclosed into line 
                 set codeonly [regsub {//.*} $line ""]
                 set codeonly [regsub -all {/\*[^(\*/)]*\*/} $codeonly ""]
                 
                 # cut-off multi-line comments
                 if {[regexp {(/\*.*)} $codeonly]} {
                         set codeonly [regsub {(/\*.*)} $codeonly ""]
                         set cflag 1
                 }
                 if {[regexp {(.*\*/)} $codeonly]} {
                         set codeonly [regsub {(.*\*/)} $codeonly ""]
                         set cflag 0
                 }
                 
                 # count all ";" and "}" occurrences (line of code ending chars)
                 if {$cflag == 0} {
                         # For debugging uncomment the following line
                        # puts $codeonly
                         incr loc [regexp -all {[;\}]} $codeonly]
                 }
         }
         close $fd
         return $loc
 }

RS: Reading a file line-by-line is best done with the idiom

 while {[gets $fp line] >= 0} {
    ...
 }

because eof is only detected after the first failed gets (which returns -1 then).


See also: Counting comments in a source


male - 2006.02.09: I saw this page and Counting comments in a source and tried to combine both procedures into one procedure. This procedure sourceMetrics analysis the given source code or source file (C, C++ and tcl) and returns a list of values - count of lines, code lines, and comment lines, the coverage of the code and the comments in relation to the count of lines, the count of comment blocks (related single line comments too) and the line numbers of the comments.

 proc sourceMetrics {sourceFileName {sourceData -1}} {
         # detect the file format and configure this procedure with this
         #
         set supportedFormats        [list cpp cxx c tcl];

         switch -exact -- [set sourceFormat [string tolower [string trimleft [file extension $sourceFileName] "."]]] {
                 tcl {
                         set cStyle        0;

                         array set commentPattern [list \
                                 empty        {^\s*#+$} \
                                 line        {^\s*#+(\s*\S+)+} \
                                 inline        {;\s*#+(\s*\S+)+$} \
                         ];
                 }
                 c - cpp - cxx {
                         set cStyle        1;

                         array set commentPattern [list \
                                 empty        {^\s*(?:(?://)|(?:/\*\s*\*/))$} \
                                 line        {^\s*/{2,}(?:\s*\S+)+} \
                                 inline        {;\s*(?:(?:/{2,}(\s*\S+)+)|(?:/\*(?:\s*\S+)+\s*\*/))$} \
                                 start        {^\s*/\*(?:\s*\S+)*} \
                                 end        {^(\s*\S*)*\*/} \
                         ];
                 }
                 default {
                         error "not supported file format \"$sourceFormat\"";
                 }
         }

         # open the source file, if no source code is provided
         #
         if {$sourceData == -1} {
                 if {([file exists $sourceFileName] == 1) &&
                          ([file isfile $sourceFileName] == 1)} {
                         if {[catch {set sourceFile [open $sourceFileName r];} reason] == 1} {
                                 error $reason;
                         }

                         fconfigure $sourceFile -buffering full -buffersize 1000000;

                         set sourceData        [read $sourceFile [file size $sourceFileName]];

                         close $sourceFile;
                 }
         }

         # loop over the lines of source code
         #
         set comment                0;
         set cComment                0;
         set commentsCount        0;
         set commentStartLine        0;
         set commentLastLine        0;
         set commentIndices        [list];
         set commentLinesCount        0;
         set codeLinesCount        0;

         set sourceLines                [split $sourceData "\n"];
         set sourceLinesCount        [llength $sourceLines];

         for {set idx 0} {$idx < $sourceLinesCount} {incr idx} {
                 set lineNumber        [expr {$idx + 1}];
                 set line                        [lindex $sourceLines $idx];

                 if {$cComment == 0} {
                         # skip blank lines
                         # and continue with the next line if detected
                         #
                         # ends a comment block of single line comments
                         #
                         if {[string length $line] == 0} {
                                 if {$comment == 1} {
                                         lappend commentIndices [list $commentStartLine $commentLastLine];

                                         set comment        0;
                                 }

                                 continue;
                         }

                         # skip end of blocks, closing braces only
                         # and continue with the next line if detected
                         #
                         # ends a comment block of single line comments
                         #
                         if {[regexp {^\s*\}} $line] == 1} {
                                 if {$comment == 1} {
                                         lappend commentIndices [list $commentStartLine $commentLastLine];

                                         set comment        0;
                                 }

                                 continue;
                         }

                         # detect comment lines with no words
                         # and continue with the next line if detected
                         #
                         if {[regexp $commentPattern(empty) $line] == 1} {
                                 incr commentLinesCount;

                                 if {$comment == 0} {
                                         set cComment                0;
                                         set comment                1;
                                         set commentStartLine        $lineNumber;

                                         incr commentsCount;
                                 }

                                 set commentLastLine        $lineNumber;

                                 continue;
                         }
                 }

                 # if the C style is requested, than test for C style comment block
                 # starts and ends
                 #
                 if {$cStyle == 1} {
                         if {$cComment == 0} {
                                 # detect a C comment block start
                                 # and continue with the next line if detected
                                 #
                                 if {[regexp $commentPattern(start) $line] == 1} {
                                         incr commentLinesCount;

                                         if {$comment == 0} {
                                                 set cComment                1;
                                                 set comment                1;
                                                 set commentStartLine        $lineNumber;

                                                 incr commentsCount;
                                         }

                                         set commentLastLine        $lineNumber;

                                         continue;
                                 }
                         } else {
                                 # detect a C comment block end
                                 # and continue with the next line
                                 #
                                 if {[regexp $commentPattern(end) $line] == 1} {
                                         incr commentLinesCount;

                                         set cComment                0;
                                         set comment                0;
                                         set commentLastLine        $lineNumber;

                                         lappend commentIndices [list $commentStartLine $commentLastLine];
                                 }

                                 continue;
                         }
                 }

                 # detect a single comment line
                 # and continue with the next line if detected
                 #
                 if {[regexp $commentPattern(line) $line] == 1} {
                         incr commentLinesCount;

                         if {$comment == 0} {
                                 set cComment                0;
                                 set comment                        1;
                                 set commentStartLine        $lineNumber;

                                 incr commentsCount;
                         }

                         set commentLastLine        $lineNumber;

                         continue;
                 }

                 # detect a comment behind a command
                 #
                 if {[regexp $commentPattern(inline) $line] == 1} {
                         incr commentLinesCount;
                 }

                 # detect lines with lines continuation sign to count them as one line
                 #
                 while {[string match {* \\} $line] == 1} {
                         incr idx;

                         set line        [lindex $sourceLines $idx];
                 }

                 # now it could only be a code line left
                 #
                 # ends a comment block of single line comments
                 #
                 if {$comment == 1} {
                         set comment        0;

                         lappend commentIndices [list $commentStartLine $commentLastLine];
                 }

                 incr codeLinesCount;
         }

         # return the metrix
         #
         return [list \
                 lines                $sourceLinesCount \
                 codelines        $codeLinesCount \
                 commentlines        $commentLinesCount \
                 coverage        [list \
                         code        [expr {$codeLinesCount == 0 ? 0 : $codeLinesCount / double($sourceLinesCount) * 100}] \
                         comment        [expr {$commentLinesCount == 0 ? 0 : $commentLinesCount / double($sourceLinesCount) * 100}] \
                 ] \
                 comments        $commentsCount \
                 commentindices        $commentIndices \
         ];
 }

LV The non-tcl program slcl [L1 ] says on its home page that it counts Tcl lines of source.


loccount by ESR is a faster implementation (in Go) of David A. Wheeler's sloccount tool. It handles a wider spread of languages, as well (including Tcl).