Looking for an efficient implementation of the reverse of lzip or interleave in Tcl: Building sublists with every nth item

Question

This describes an interleave function that can lzip data:

% interleave {a b c} {1 2 3}
a 1 b 2 c 3

I am looking for the reverse operation. Also I would like to specify into how many sublists the input shall be split. For example:

% lnth {a 1 b 2 c 3}  1
{a 1 b 2 c 3}

% lnth {a 1 b 2 c 3}  2
{a b c} {1 2 3}

% lnth {a 1 b 2 c 3}  3
{a 2} {1 c} {b 3}

% lnth {a 1 b 2 c 3}  6
{a} {1} {b} {2} {c} {3}

For uneven splits, the missing elements shall be just omitted. If you feel like it you could provide a default argument to be filled in, but that's not required. Also I don't mind the exact quotation of the two corner cases where n==1 or n==[llength $L]. Thanks Hai Vu for pointing this out in your earlier answer.

It would be good to have some notion of complexity in time and memory.

I'm on Tcl8.4 (this cannot be changed).

Update

For these kind of benchmark question its always good to have a central summary. All tests ran on the same machine, on the (rather small) example list $L as shown below. It's all highly un-scientific.Good code comes from the answers below, errors are mine.

Test code:

#!/usr/bin/tclsh


proc build_list {len} {
    incr len
    while {[incr len -1]} {
        lappend res {}
    }
    set res
}



proc lnth3_prebuild_no_modulo {L n} {
    # Build empty 2D list to hold result
    set iterations [expr {int(ceil(double([llength $L]) / $n))}]
    set one [build_list $iterations]
    set res [list]
    set cnt [expr {$n+1}]
    while {[incr cnt -1]} {
        lappend res $one
    }

    # Fill in original/real values
    set iteration 0
    set subListNumber 0
    foreach item $L {
        lset res $subListNumber $iteration $item
        if {[incr subListNumber] == $n} {
            set subListNumber 0
            incr iteration
        }
    }
    set res
}


proc lnth3_no_modulo {L n} {
    # Create a list of variables: subList0, subList1, subList2, ...
    for {set subListNumber 0} {$subListNumber < $n} {incr subListNumber} {
        set subList$subListNumber {}
    }

    # Build the sub-lists    
    set subListNumber 0
    foreach item $L {
        lappend subList$subListNumber $item
        if {[incr subListNumber] == $n} {
            set subListNumber 0
        }
    }

    # Build the result from all the sub-lists    
    set result {}
    for {set subListNumber 0} {$subListNumber < $n} {incr subListNumber} {
        lappend result [set subList$subListNumber]
    }

    return $result
}


proc lnth {L n} {
    set listvars ""
    for {set cnt 0} {$cnt < $n} {incr cnt} {
        lappend listvars "L$cnt"
    }

    set iterations [expr {ceil(double([llength $L]) / $n)}]
    for {set cnt 0} {$cnt < $iterations} {incr cnt} {
        foreach listvar $listvars el [lrange $L [expr {$cnt*$n}] [expr {($cnt+1)*$n-1}] ] {
            lappend $listvar $el
        }
    }

    set res [list]
    foreach listvar $listvars {
        lappend res [eval "join \$$listvar"]
    }
    set res
}


proc lnth_prebuild {L n} {
    set iterations [expr {int(ceil(double([llength $L]) / $n))}]
    set one [build_list $iterations]

    set listvars ""
    for {set cnt 0} {$cnt < $n} {incr cnt} {
        lappend listvars L$cnt
        set L$cnt $one
    }

    for {set cnt 0} {$cnt < $iterations} {incr cnt} {
        foreach listvar $listvars el [lrange $L [expr {$cnt*$n}] [expr {($cnt+1)*$n-1}] ] {
            lset $listvar $cnt $el
        }
    }

    set res [list]
    foreach listvar $listvars {
        lappend res [eval "join \$$listvar"]
    }
    set res
}



proc lnth2 {L n} {
    set listLen [llength $L]
    set subListLen [expr {$listLen / $n}]
    if {$listLen % $n != 0} { incr subListLen }
    set result {}

    for {set iteration 0} {$iteration < $n} {incr iteration} {
        set subList {}
        for {set i $iteration} {$i < $listLen} {incr i $n} {
            lappend subList [lindex $L $i]
        }
        lappend result $subList
    }
    return $result
}


proc lnth3 {L n} {
    # Create a list of variables: subList0, subList1, subList2, ...
    for {set subListNumber 0} {$subListNumber < $n} {incr subListNumber} {
        set subList$subListNumber {}
    }

    # Build the sub-lists    
    set i 0
    foreach item $L {
        set subListNumber [expr {$i % $n}]
        lappend subList$subListNumber $item
        incr i
    }

    # Build the result from all the sub-lists    
    set result {}
    for {set subListNumber 0} {$subListNumber < $n} {incr subListNumber} {
        lappend result [set subList$subListNumber]
    }

    return $result
}



# stuff subcommands in a namespace
namespace eval ::unlzip {}

proc unlzip {L n} {
   # check if we have the proc already
   set name [format "::unlzip::arity%dunlzip" $n]
   if {[llength [info commands $name]]} {
      return [$name $L]
   } else {
      # create it
      proc $name {V} [::unlzip::createBody $n]
      return [$name $L]
   }
}

proc ::unlzip::createBody {n} {
   for {set i 0} {$i < $n} {incr i} {
       lappend names v$i
       lappend lnames lv$i
   }
   set lbody ""
   set ret {
   return [list }
   foreach lname $lnames name $names {
       append lbody [format {
       lappend %s $%s} $lname $name]
       append ret "\$$lname "
   }
   append ret {]}
   return [format {foreach {%s} $V { %s }
                   %s} $names $lbody $ret]
}




### Tests
set proc_reference lnth
set procs {lnth_prebuild lnth2 lnth3 unlzip lnth3_no_modulo lnth3_prebuild_no_modulo}
set L {a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 j 9 i 10 k 11 l 12 m 13 n 14 o 15 p 16 q 17 r 18 s 19 t 20 u 21 v 22 w 23 x 24 y 25 z 26}
set Ns {1 2 3 4 5 6 7 8 9 10 13 26}

# Functional verification
foreach n $Ns {
    set expected [$proc_reference $L $n]
    foreach p $procs {
        set result [$p $L $n]
        if {$expected ne $result} {
            puts "Wrong result for proc $p, N=$n."
            puts "  Expected: $expected"
            puts "       Got: $result"
        }
    }
}

# Table header
puts -nonewline [format "%30s" {proc_name\N}]
foreach n $Ns {
    puts -nonewline [format "  %7d" $n]
}
puts ""

# Run benchmarks
foreach proc_name [concat $proc_reference $procs] {
    puts -nonewline [format "%30s" $proc_name]
    foreach n $Ns {
        puts -nonewline [format "  %7.2f" [lindex [time "$proc_name \$L $n" 10000] 0]]
    }
    puts ""
}

The results:

               proc_name\N        1        2        3        4        5        6        7        8        9       10       13       26
                      lnth    33.34    23.73    21.88    20.51    21.33    21.33    22.41    23.07    23.36    25.59    26.09    38.39
             lnth_prebuild    41.14    31.00    28.88    27.24    28.48    29.06    30.45    31.46    31.43    34.65    34.45    49.10
                     lnth2     8.56     8.08     8.35     8.78     9.12     9.29     9.66     9.98    10.29    10.61    11.22    14.94
                     lnth3    17.15    18.35    18.91    19.55    20.55    21.42    22.24    23.54    23.71    24.27    25.79    33.78
                    unlzip     5.36     5.25     5.03     4.97     5.27     5.42     5.52     5.43     5.42     5.96     5.51     6.83
           lnth3_no_modulo    14.88    16.56    17.20    17.97    18.63    19.42    19.78    20.74    21.53    21.84    23.60    31.29
  lnth3_prebuild_no_modulo    14.44    13.30    12.83    12.51    12.51    12.43    12.36    12.41    12.41    12.83    12.70    14.09

schlenk · Accepted Answer

One option would be creating specialized procs on the fly:

Not sure how fast it is for larger N or larger sets, but should be quite fast for repeated runs, as you have nearly no overhead than straight calls to foreach and lappend.

package require Tcl 8.4

# stuff subcommands in a namespace
namespace eval ::unlzip {}

proc unlzip {L n} {
   # check if we have the proc already
   set name [format "::unlzip::arity%dunlzip" $n]
   if {[llength [info commands $name]]} {
      return [$name $L]
   } else {
      # create it
      proc $name {V} [::unlzip::createBody $n]
      return [$name $L]
   }
}

proc ::unlzip::createBody {n} {
   for {set i 0} {$i < $n} {incr i} {
       lappend names v$i
       lappend lnames lv$i
   }
   set lbody ""
   set ret {
   return [list }
   foreach lname $lnames name $names {
       append lbody [format {
       lappend %s $%s} $lname $name]
       append ret "\$$lname "
   }
   append ret {]}
   return [format {foreach {%s} $V { %s }
                   %s} $names $lbody $ret]
}

proc ::unlzip::arity1unlzip {V} {
   return [list $V]
}

# example how the function looks for N=2

proc ::unlzip::arity2unlzip {V} {
   foreach {v1 v2} $V {
      lappend lv1 $v1
      lappend lv2 $v2
   }
   return [list $lv1 $lv2]
}

The disassambled bytecode for Tcl 8.6 for the N=3 proc would look like this (via Tcl 8.6. ::tcl::unsupported::disassemble proc:

ByteCode 0x00667988, refCt 1, epoch 5, interp 0x005E0B70 (epoch 5)
Source "foreach {v0 v1 v2} $V { 
	      lappend lv0 $v0
	      "
Cmds 6, src 149, inst 86, litObjs 1, aux 1, stkDepth 3, code/src 0.00
Proc 0x00694368, refCt 1, args 1, compiled locals 9
  slot 0, scalar, arg, "V"
  slot 1, scalar, temp
  slot 2, scalar, temp
  slot 3, scalar, "v0"
  slot 4, scalar, "v1"
  slot 5, scalar, "v2"
  slot 6, scalar, "lv0"
  slot 7, scalar, "lv1"
  slot 8, scalar, "lv2"
Exception ranges 1, depth 1:
  0: level 0, loop, pc 17-57, continue 10, break 61
Commands 6:
  1: pc 0-63, src 0-94        2: pc 17-30, src 32-46
  3: pc 31-44, src 55-69        4: pc 45-57, src 78-93
  5: pc 64-84, src 120-148        6: pc 73-83, src 128-147
Command 1: "foreach {v0 v1 v2} $V { 
	      lappend lv0 $v0
	      "
  (0) loadScalar1 %v0         # var "V"
  (2) storeScalar1 %v1        # temp var 1
  (4) pop
  (5) foreach_start4 0
            [data=[%v1], loop=%v2
             it%v1  [%v3, %v4, %v5]]
  (10) foreach_step4 0
            [data=[%v1], loop=%v2
             it%v1  [%v3, %v4, %v5]]
  (15) jumpFalse1 +46         # pc 61
Command 2: "lappend lv0 $v0"
  (17) startCommand +13 1     # next cmd at pc 30
  (26) loadScalar1 %v3        # var "v0"
  (28) lappendScalar1 %v6     # var "lv0"
  (30) pop
Command 3: "lappend lv1 $v1"
  (31) startCommand +13 1     # next cmd at pc 44
  (40) loadScalar1 %v4        # var "v1"
  (42) lappendScalar1 %v7     # var "lv1"
  (44) pop
Command 4: "lappend lv2 $v2 "
  (45) startCommand +13 1     # next cmd at pc 58
  (54) loadScalar1 %v5        # var "v2"
  (56) lappendScalar1 %v8     # var "lv2"
  (58) pop
  (59) jump1 -49      # pc 10
  (61) push1 0        # ""
  (63) pop
Command 5: "return [list $lv0 $lv1 $lv2 ]"
  (64) startCommand +21 2     # next cmd at pc 85, 2 cmds start here
Command 6: "list $lv0 $lv1 $lv2 "
  (73) loadScalar1 %v6        # var "lv0"
  (75) loadScalar1 %v7        # var "lv1"
  (77) loadScalar1 %v8        # var "lv2"
  (79) list 3
  (84) done
  (85) done

As straight forward as it gets..., well, if the lists are incomplete (llength $L modulo $n isn't zero) you would need some little extra checks. As long as the lists are balanced, you could als pre populate the lists and use lset instead of lappend, which is faster, as it doesn't reallocate the list array so often.

Hai Vu · Answer

Here is my approach: build one sub-list at a time, then append to the result before building the next one.

proc lnth2 {L n} {
    set listLen [llength $L]
    set subListLen [expr {$listLen / $n}]
    if {$listLen % $n != 0} { incr subListLen }
    set result {}

    for {set iteration 0} {$iteration < $n} {incr iteration} {
        set subList {}
        for {set i $iteration} {$i < $listLen} {incr i $n} {
            lappend subList [lindex $L $i]
        }
        lappend result $subList
    }
    return $result
}

Let say that L = {a 1 b 2 c 3} and n = 2, then I will build the first sub-list {a b c} by picking out the 0th, 2nd, and 4th items from the original list, append that to the result and move on the the second sub-list. Likewise, the second sub-list will be the 1th, 3rd, and 5th items.

Update

After reviewing my solution, I still don't like the fact that I have to use lindex. I imagine lindex has to travese the list in order to find the list item, and my solution placed lindex right inside a loop; which means we traverse the same list several times. The next attempt is to traverse the list only once. This time, I mimic your algorithm, but avoid using the list functions such as lrange.

proc lnth3 {L n} {
    # Create a list of variables: subList0, subList1, subList2, ...
    for {set subListNumber 0} {$subListNumber < $n} {incr subListNumber} {
        set subList$subListNumber {}
    }

    # Build the sub-lists    
    set i 0
    foreach item $L {
        set subListNumber [expr {$i % $n}]
        lappend subList$subListNumber $item
        incr i
    }

    # Build the result from all the sub-lists    
    set result {}
    for {set subListNumber 0} {$subListNumber < $n} {incr subListNumber} {
        lappend result [set subList$subListNumber]
    }

    return $result
}

Sadly, this attempt performs worse than my first attempt. I still don't understand why.

Looking for an efficient implementation of the reverse of lzip or interleave in Tcl: Building sublists with every nth item

Tags:

list

tcl

cfi

2 Answers

schlenk

Update

Hai Vu

Recent Activity

Donate For Us

Looking for an efficient implementation of the reverse of lzip or interleave in Tcl: Building sublists with every nth item

Tags:

list

tcl

cfi

2 Answers

schlenk

Update

Hai Vu

Related questions

Recent Activity

Donate For Us