images2pdf is a command-line utility that uses pdf4tcl to store create a pdf document from images and other pdf documents, without modifying the image data.
PYK 2016-03-10: To verify that the included images have not been modified, extract them again with a utility like pdfimages -all that just extacts images without processing them. The extracted images should be identical, bit-for-bit, to the original images.
Each image is output as an separate page, and the width and height of the page are the width and height of the image. To constrain one or the other, use the height or width options. The image width/height ratio is always preserved. The constraint only reflects how the image is dynamically resized for presentation, not now the image is stored in the PDF file -- which for png and jpg files is always as a bit-for-bit identical copy of the original image. Currently, only jpeg and png files are supported.
Some PDF readers don't provide controls to zoom to a size smaller than the dimensions of the page that contain it, but they do provide controls to magnify the image, so a default value of 800 is a good choice to ensure that the initial image fits into a reasonable display width, while still making it possible to zoom in for greater detail.
Utilities that might be used in conjuction with this script include tiff2pdf and jpegtran for lossless transformations of jpeg images.
#! /bin/env tclsh package require fileutil::magic::filetype package require pdf4tcl proc main {argv0 argv} { set dims {} set ftypes {} set infiles {} set orient {} set maxheight -1 set maxwidth 800 while {[llength $argv]} { set argv [lassign $argv[set argv {}] key val] switch $key { outfile { set outfile $val } height { set maxheight $val } infile { lappend infiles $val } infiles { lappend infiles $val {*}$argv[set argv {}] } outprefix { set outprefix $val } width { set maxwidth val } default { return -code error [list {unknown option} $key] } } } if {[info exists outprefix]} { while {[file exists $outprefix-[incr outi]]} {} } else { while {[llength [glob -nocomplain [set outprefix [ string repeat 0 [incr outi]]]*]]} {} } pdf4tcl::new mypdf foreach infile $infiles { set ftype [fileutil::magic::filetype $infile] if {[string match {JPEG *} $ftype]} { set ftype jpeg } elseif {[string match {TIFF *} $ftype]} { set ftype tiff } elseif {[string match {PDF *} $ftype]} { set ftype pdf } else { return -code error [list {unknown file type} $ftype] } lappend ftypes $ftype switch $ftype { jpeg - tiff { # first run is just to get image dimensions set id [mypdf addImage $infile -type $ftype] set width [mypdf getImageWidth $id] set height [mypdf getImageHeight $id] puts stderr [list infile $infile type $ftype height $height width $width] while {($maxwidth > -1 && $width > $maxwidth) || ($maxheight > -1 && $height > $maxheight)} { set height [expr {$height / 2}] set width [expr {$width / 2}] } lappend dims [list $width $height] } default { lappend dims {} } } } mypdf destroy set tmpfiles {} set idx -1 foreach infile $infiles[set infiles {}] dim $dims ftype $ftypes { switch $ftype { jpeg - tiff { pdf4tcl::new mypdf -paper $dim set id [mypdf addImage $infile -type $ftype] mypdf putImage $id 0 0 -width [lindex $dim 0] -height [lindex $dim 1] set fname $outprefix-[incr idx].pdf mypdf write -file $fname lappend tmpfiles $fname lappend infiles $fname mypdf destroy } pdf { lappend infiles $infile } default { return -code error [list {unknown file type} $ftype {for file} $infile] } } } if {[llength $infiles] > 1} { if {![info exists outfile]} { set outfile $outprefix-[incr idx].pdf } if {[file exists $outfile]} { return -code error [list {file already exists} $outfile] } pdf4tcl::catPdf {*}$infiles $outfile file delete {*}$tmpfiles } else { if {[info exists outfile]} { if {[file exists $outfile]} { return -code error [list {file already exists} $outfile] } file rename [lindex $infiles 0] $outfile } } } main $argv0 $argv