diff options
Diffstat (limited to 'ebus-racket/3rdparty/bzlib/base/bytes.ss')
-rw-r--r-- | ebus-racket/3rdparty/bzlib/base/bytes.ss | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/ebus-racket/3rdparty/bzlib/base/bytes.ss b/ebus-racket/3rdparty/bzlib/base/bytes.ss new file mode 100644 index 0000000..0edab66 --- /dev/null +++ b/ebus-racket/3rdparty/bzlib/base/bytes.ss @@ -0,0 +1,206 @@ +#lang scheme/base +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BASE.plt - common routines that are shared by all other bzlib modules +;; +;; in a way, base.plt is the most fundamental module of the whole bzlib stack +;; and as such it also is the lowest level code. We are not likely to +;; fix the code any time soon, and hence any of the functions here are +;; explicitly likely to be obsoleted or moved elsewhere. +;; +;; Proceed with caution. +;; +;; +;; Bonzai Lab, LLC. All rights reserved. +;; +;; Licensed under LGPL. +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; bytes.ss - provides utility functions that works with bytes +;; yc 10/19/2009 - first version +;; yc 10/23/2009 - add read-bytes-avail that'll return the currently available bytes +;; yc 10/24/2009 - add read-byte-list & read-byte-list/timeout +;; yc 1/18/2010 - fix the issue that call-with-output-bytes was not available until v > 4.2 +;; yc 2/5/2010 - added string-char-ratios for accurately determien the ratio of ascii/latin-1/unicode chars +(require scheme/port scheme/contract "version-case.ss" "base.ss") + +;; call-with-output-bytes is not available until 4.1 +(define *call-with-output-bytes + (+:version>= "4.2" + call-with-output-bytes + (lambda (proc) + (let ((out (open-output-bytes))) + (dynamic-wind void + (lambda () + (proc out)) + (lambda () + (get-output-bytes out))))))) + +(define (port->bytes/charset in charset-in charset-out) + (*call-with-output-bytes + (lambda (out) + (convert-stream charset-in in charset-out out)))) + +(define (bytes->bytes/charset bytes charset-in charset-out) + (port->bytes/charset (open-input-bytes bytes) charset-in charset-out)) + +(define (bytes/charset->bytes/utf-8 bytes charset) + (bytes->bytes/charset bytes charset "utf-8")) + +(define (bytes/utf-8->bytes/charset bytes charset) + (bytes->bytes/charset bytes "utf-8" charset)) + +;; there are more to handle (specifically charsets). +(define (bytes/charset->string bytes charset) + (bytes->string/utf-8 (bytes/charset->bytes/utf-8 bytes charset))) + +(define (string->bytes/charset string charset) + (bytes/utf-8->bytes/charset (string->bytes/utf-8 string) charset)) + +(define (char-latin-1? c) + (< 0 (char->integer c) 256)) + +(define (char-ascii? c) + (< 0 (char->integer c) 128)) + +(define (string-char-or? s test?) + (define (helper len i) + (if (= len i) #f + (if (test? (string-ref s i)) #t + (helper len (add1 i))))) + (helper (string-length s) 0)) + +(define (string-char-and? s test?) + (define (helper len i) + (if (= len i) #t + (if (not (test? (string-ref s i))) #f + (helper len (add1 i))))) + (helper (string-length s) 0)) + +(define (char-type c) + (let ((i (char->integer c))) + (cond ((< i 128) 'ascii) + ((< i 256) 'latin-1) + (else 'unicode)))) + +(define (string-char-ratios s) + (define (helper ascii latin-1 unicode i len) + (if (= i len) + (values (/ ascii len) + (/ latin-1 len) + (/ unicode len)) + (case (char-type (string-ref s i)) + ((ascii) (helper (add1 ascii) latin-1 unicode (add1 i) len)) + ((latin-1) (helper ascii (add1 latin-1) unicode (add1 i) len)) + (else (helper ascii latin-1 (add1 unicode) (add1 i) len))))) + (if (= (string-length s) 0) + (values 1 0 0) + (helper 0 0 0 0 (string-length s)))) + +(define (string-type s) + (define (helper len i prev) + (if (= len i) prev + (let ((type (char-type (string-ref s i)))) + (case type + ((unicode) type) + ((latin-1) + (helper len (add1 i) (case prev + ((ascii) type) + (else prev)))) + (else (helper len (add1 i) prev)))))) + (helper (string-length s) 0 'ascii)) + +(define (string-latin-1? s) + (string-char-and? s char-latin-1?)) + +(define (string-ascii? s) + (string-char-and? s char-ascii?)) + +(define (char->bytes c) + (string->bytes/utf-8 (string c))) + +(define (split-string-by-bytes-count str num) + (define (maker chars) + (list->string (reverse chars))) + (define (helper str i chars blen acc) + (if (= i (string-length str)) ;; we are done here!!!... + (reverse (if (null? chars) acc + (cons (maker chars) acc))) + (let* ((c (string-ref str i)) + (count (char-utf-8-length c))) + (if (> (+ count blen) num) ;; we are done with this version.... + (if (= blen 0) ;; this means the character itself is greater than the count. + (helper str (add1 i) '() 0 (cons (maker (cons c chars)) acc)) + (helper str i '() 0 (cons (maker chars) acc))) + (helper str (add1 i) (cons c chars) (+ count blen) acc))))) + (helper str 0 '() 0 '())) + +(define (read-bytes-avail num in) + (define (helper bytes) + (let ((len (read-bytes-avail!* bytes in 0 num))) + (cond ((eof-object? len) bytes) + ((number? len) (subbytes bytes 0 len)) + (else ;; this is a *special* value... I don't know what to do with it yet... + (len))))) + (helper (make-bytes num 0))) + +(define (read-byte-list num in) + (define (helper bytes) + (if (eof-object? bytes) + bytes + (bytes->list bytes))) + (helper (read-bytes num in))) + +(define (read-byte-list/timeout num in (timeout #f)) + (define (helper alarm acc count) + (let ((evt (sync alarm in))) + (if (eq? alarm evt) + (reverse acc) + (let ((b (read-byte in))) + (cond ((eof-object? b) + (if (null? acc) + b + (reverse acc))) + ((= (add1 count) num) + (reverse (cons b acc))) + (else + (helper alarm (cons b acc) (add1 count)))))))) + (helper (alarm-evt (+ (current-inexact-milliseconds) (* 1000 (if (not timeout) + +inf.0 + timeout)))) '() 0)) + +(define (read-bytes/timeout num in (timeout #f)) + (define (helper bytes) + (if (eof-object? bytes) + bytes + (list->bytes bytes))) + (helper (read-byte-list/timeout num in timeout))) + +(define (positive-number? n) + (and (number? n) (> n 0))) + +(provide/contract + (char-ascii? (typeof/c char?)) + (char-latin-1? (typeof/c char?)) + (string-char-or? (-> string? (-> char? any) any)) + (string-char-and? (-> string? (-> char? any) any)) + (string-latin-1? (typeof/c string?)) + (string-ascii? (typeof/c string?)) + (char-type (typeof/c char?)) + (string-char-ratios (-> string? (values number? number? number?))) + (string-type (typeof/c string?)) + (split-string-by-bytes-count (-> string? exact-positive-integer? (listof string?))) + (port->bytes/charset (-> input-port? string? string? any)) + (bytes->bytes/charset (-> bytes? string? string? bytes?)) + (bytes/charset->bytes/utf-8 (-> bytes? string? bytes?)) + (bytes/utf-8->bytes/charset (-> bytes? string? bytes?)) + (bytes/charset->string (-> bytes? string? string?)) + (string->bytes/charset (-> string? string? bytes?)) + (read-bytes-avail (-> exact-positive-integer? input-port? bytes?)) + (read-byte-list (-> exact-positive-integer? input-port? bytes?)) + (read-bytes/timeout (->* (exact-positive-integer? input-port?) + ((or/c #f positive-number?)) + bytes?)) + (read-byte-list/timeout (->* (exact-positive-integer? input-port?) + ((or/c #f positive-number?)) + any)) + ) + |