Revamp gzip header decoding, process header CRC16

This enables optional checking of FHCRC CRC16 header checksum in gzip
stream processing, if present and check-checksum is enabled.
This commit is contained in:
2023-05-21 15:39:39 +02:00
parent 8ad4bc425c
commit 3b98f3f476

View File

@ -630,78 +630,91 @@ return values (or nil if any given field is not present). Checks the
header for magic values and correct flags settings and signals a header for magic values and correct flags settings and signals a
gzip-decompression-error in case of incorrect or unsupported magic gzip-decompression-error in case of incorrect or unsupported magic
values or flags." values or flags."
(let ((id1 (read-byte input-stream)) (let ((crc32 +crc-32-start-value+))
(id2 (read-byte input-stream)) (labels ((crc-read-buffer (len)
(compression-method (read-byte input-stream)) (let ((buffer (make-array len :element-type '(unsigned-byte 8))))
(flags (read-byte input-stream))) (unless (= (read-sequence buffer input-stream) len)
(unless (and (= id1 +gzip-header-id1+) (= id2 +gzip-header-id2+)) (error 'gzip-decompression-error
(error 'gzip-decompression-error :format-control "Unexpected End Of File during GZIP Header parsing!"
:format-control :format-arguments nil))
"Header missing magic values ~2,'0X,~2,'0X (got ~2,'0X,~2,'0X instead)!" (setq crc32 (update-crc32-checksum crc32 buffer len))
:format-arguments (list +gzip-header-id1+ +gzip-header-id2+ id1 id2))) buffer))
(unless (= compression-method 8) (crc-read-byte ()
(error 'gzip-decompression-error (let ((buffer (crc-read-buffer 1)))
:format-control "Unknown compression-method in Header ~2,'0X!" (aref buffer 0)))
:format-arguments (list compression-method))) (crc-read-word ()
(unless (zerop (ldb (byte 3 5) flags)) (let ((buffer (crc-read-buffer 2)))
(error 'gzip-decompression-error (+ (aref buffer 0)
:format-control "Unknown flags in Header ~2,'0X!" (* (aref buffer 1) 256))))
:format-arguments (list flags))) (crc-read-dword ()
(values compression-method (let ((buffer (crc-read-buffer 4)))
;; FTEXT (+ (aref buffer 0)
(= 1 (ldb (byte 1 0) flags)) (* (aref buffer 1) 256)
;; MTIME (* (aref buffer 2) 256 256)
(parse-gzip-mtime input-stream) (* (aref buffer 3) 256 256 256))))
;; XFLAGS (crc-read-string ()
(read-byte input-stream) (with-output-to-string (string)
;; OS (loop with buffer = (make-array 1 :element-type '(unsigned-byte 8))
(read-byte input-stream) for value = (read-byte input-stream)
;; FEXTRA do
(unless (zerop (ldb (byte 1 2) flags)) (setf (aref buffer 0) value)
(parse-gzip-extra input-stream)) (setq crc32 (update-crc32-checksum crc32 buffer 1))
;; FNAME (if (zerop value)
(unless (zerop (ldb (byte 1 3) flags)) (return t)
(parse-gzip-string input-stream)) (write-char (code-char value) string))))))
;; FCOMMENT (let ((id1 (crc-read-byte))
(unless (zerop (ldb (byte 1 4) flags)) (id2 (crc-read-byte))
(parse-gzip-string input-stream)) (compression-method (crc-read-byte))
;; CRC16 (flags (crc-read-byte)))
(unless (zerop (ldb (byte 1 1) flags)) (unless (and (= id1 +gzip-header-id1+) (= id2 +gzip-header-id2+))
(+ (read-byte input-stream) (error 'gzip-decompression-error
(* (read-byte input-stream 256))))))) :format-control
"Header missing magic values ~2,'0X,~2,'0X (got ~2,'0X,~2,'0X instead)!"
(defun parse-gzip-mtime (input-stream) :format-arguments (list +gzip-header-id1+ +gzip-header-id2+ id1 id2)))
(let ((time (+ (read-byte input-stream) (unless (= compression-method 8)
(* (read-byte input-stream) 256) (error 'gzip-decompression-error
(* (read-byte input-stream) 256 256) :format-control "Unknown compression-method in Header ~2,'0X!"
(* (read-byte input-stream) 256 256 256)))) :format-arguments (list compression-method)))
(if (zerop time) (unless (zerop (ldb (byte 3 5) flags))
nil (error 'gzip-decompression-error
(+ time 2208988800)))) :format-control "Unknown flags in Header ~2,'0X!"
:format-arguments (list flags)))
(defun parse-gzip-extra (input-stream) (values compression-method
(let* ((length (+ (read-byte input-stream) (* (read-byte input-stream) 256))) ;; FTEXT
(result (make-array length :element-type '(unsigned-byte 8)))) (= 1 (ldb (byte 1 0) flags))
(read-sequence result input-stream) ;; MTIME
result)) (let ((time (crc-read-dword)))
(unless (zerop time)
(defun parse-gzip-string (input-stream) (+ time 2208988800)))
(with-output-to-string (string) ;; XFLAGS
(loop for value = (read-byte input-stream) (crc-read-byte)
until (zerop value) ;; OS
do (write-char (code-char value) string)))) (crc-read-byte)
;; FEXTRA
(defun parse-gzip-checksum (input-stream) (unless (zerop (ldb (byte 1 2) flags))
(+ (read-byte input-stream) (crc-read-buffer (crc-read-word)))
(* (read-byte input-stream) 256) ;; FNAME
(* (read-byte input-stream) 256 256) (unless (zerop (ldb (byte 1 3) flags))
(* (read-byte input-stream) 256 256 256))) (crc-read-string))
;; FCOMMENT
(unless (zerop (ldb (byte 1 4) flags))
(crc-read-string))
;; CRC16
(unless (zerop (ldb (byte 1 1) flags))
(+ (read-byte input-stream)
(* (read-byte input-stream) 256)))
;; Calculated CRC16
(ldb (byte 16 0) crc32))))))
(defun parse-gzip-footer (input-stream) (defun parse-gzip-footer (input-stream)
"Parse the GZIP-style footer as per RFC 1952 from the input-stream and "Parse the GZIP-style footer as per RFC 1952 from the input-stream and
return the CRC-32 checksum and ISIZE fields contained in the footer as return the CRC-32 checksum and ISIZE fields contained in the footer as
its return values." its return values."
(values (parse-gzip-checksum input-stream) (values ;; CRC-32
(+ (read-byte input-stream)
(* (read-byte input-stream) 256)
(* (read-byte input-stream) 256 256)
(* (read-byte input-stream) 256 256 256))
;; ISIZE ;; ISIZE
(+ (read-byte input-stream) (+ (read-byte input-stream)
(* (read-byte input-stream) 256) (* (read-byte input-stream) 256)
@ -777,13 +790,17 @@ expanded data matches the CRC-32 checksum, unless the check-checksum
keyword argument is set to true, in which case the checksum is checked keyword argument is set to true, in which case the checksum is checked
internally and a gzip-decompression-error is signalled if they don't internally and a gzip-decompression-error is signalled if they don't
match." match."
(multiple-value-bind (cm ftext mtime xfl os fextra fname fcomment) (multiple-value-bind (cm ftext mtime xfl os fextra fname fcomment crc16-old crc16-new)
(parse-gzip-header input-stream) (parse-gzip-header input-stream)
(declare (ignore ftext xfl os fextra)) (declare (ignore ftext xfl os fextra))
(unless (= cm 8) (unless (= cm 8)
(error 'gzip-decompression-error (error 'gzip-decompression-error
:format-control "Unknown compression method ~D!" :format-control "Unknown compression method ~D!"
:format-arguments (list cm))) :format-arguments (list cm)))
(when (and check-checksum crc16-old (/= crc16-old crc16-new))
(error 'gzip-decompression-error
:format-control "CRC-16 Checksum mismatch in header: ~4,'0X != ~4,'0X!"
:format-arguments (list crc16-old crc16-new)))
(let ((checksum-new (inflate-stream input-stream output-stream (let ((checksum-new (inflate-stream input-stream output-stream
:checksum (when check-checksum :crc-32))) :checksum (when check-checksum :crc-32)))
(checksum-old (parse-gzip-footer input-stream))) (checksum-old (parse-gzip-footer input-stream)))
@ -793,4 +810,4 @@ match."
:format-control :format-control
"Checksum mismatch for decompressed stream: ~8,'0X != ~8,'0X!" "Checksum mismatch for decompressed stream: ~8,'0X != ~8,'0X!"
:format-arguments (list checksum-old checksum-new))) :format-arguments (list checksum-old checksum-new)))
(values checksum-old fname mtime fcomment)))) (values checksum-old fname mtime fcomment crc16-old crc16-new))))