1"""
2Read and write ZIP files.
3"""
4import struct, os, time, sys, shutil
5import binascii, cStringIO, stat
6import io
7import re
8import string
9
10try:
11    import zlib # We may need its compression method
12    crc32 = zlib.crc32
13except ImportError:
14    zlib = None
15    crc32 = binascii.crc32
16
17__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
18           "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
19
20class BadZipfile(Exception):
21    pass
22
23
24class LargeZipFile(Exception):
25    """
26    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
27    and those extensions are disabled.
28    """
29
30error = BadZipfile      # The exception raised by this module
31
32ZIP64_LIMIT = (1 << 31) - 1
33ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
34ZIP_MAX_COMMENT = (1 << 16) - 1
35
36# constants for Zip file compression methods
37ZIP_STORED = 0
38ZIP_DEFLATED = 8
39# Other ZIP compression methods not supported
40
41# Below are some formats and associated data for reading/writing headers using
42# the struct module.  The names and structures of headers/records are those used
43# in the PKWARE description of the ZIP file format:
44#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
45# (URL valid as of January 2008)
46
47# The "end of central directory" structure, magic number, size, and indices
48# (section V.I in the format document)
49structEndArchive = "<4s4H2LH"
50stringEndArchive = "PK\005\006"
51sizeEndCentDir = struct.calcsize(structEndArchive)
52
53_ECD_SIGNATURE = 0
54_ECD_DISK_NUMBER = 1
55_ECD_DISK_START = 2
56_ECD_ENTRIES_THIS_DISK = 3
57_ECD_ENTRIES_TOTAL = 4
58_ECD_SIZE = 5
59_ECD_OFFSET = 6
60_ECD_COMMENT_SIZE = 7
61# These last two indices are not part of the structure as defined in the
62# spec, but they are used internally by this module as a convenience
63_ECD_COMMENT = 8
64_ECD_LOCATION = 9
65
66# The "central directory" structure, magic number, size, and indices
67# of entries in the structure (section V.F in the format document)
68structCentralDir = "<4s4B4HL2L5H2L"
69stringCentralDir = "PK\001\002"
70sizeCentralDir = struct.calcsize(structCentralDir)
71
72# indexes of entries in the central directory structure
73_CD_SIGNATURE = 0
74_CD_CREATE_VERSION = 1
75_CD_CREATE_SYSTEM = 2
76_CD_EXTRACT_VERSION = 3
77_CD_EXTRACT_SYSTEM = 4
78_CD_FLAG_BITS = 5
79_CD_COMPRESS_TYPE = 6
80_CD_TIME = 7
81_CD_DATE = 8
82_CD_CRC = 9
83_CD_COMPRESSED_SIZE = 10
84_CD_UNCOMPRESSED_SIZE = 11
85_CD_FILENAME_LENGTH = 12
86_CD_EXTRA_FIELD_LENGTH = 13
87_CD_COMMENT_LENGTH = 14
88_CD_DISK_NUMBER_START = 15
89_CD_INTERNAL_FILE_ATTRIBUTES = 16
90_CD_EXTERNAL_FILE_ATTRIBUTES = 17
91_CD_LOCAL_HEADER_OFFSET = 18
92
93# The "local file header" structure, magic number, size, and indices
94# (section V.A in the format document)
95structFileHeader = "<4s2B4HL2L2H"
96stringFileHeader = "PK\003\004"
97sizeFileHeader = struct.calcsize(structFileHeader)
98
99_FH_SIGNATURE = 0
100_FH_EXTRACT_VERSION = 1
101_FH_EXTRACT_SYSTEM = 2
102_FH_GENERAL_PURPOSE_FLAG_BITS = 3
103_FH_COMPRESSION_METHOD = 4
104_FH_LAST_MOD_TIME = 5
105_FH_LAST_MOD_DATE = 6
106_FH_CRC = 7
107_FH_COMPRESSED_SIZE = 8
108_FH_UNCOMPRESSED_SIZE = 9
109_FH_FILENAME_LENGTH = 10
110_FH_EXTRA_FIELD_LENGTH = 11
111
112# The "Zip64 end of central directory locator" structure, magic number, and size
113structEndArchive64Locator = "<4sLQL"
114stringEndArchive64Locator = "PK\x06\x07"
115sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
116
117# The "Zip64 end of central directory" record, magic number, size, and indices
118# (section V.G in the format document)
119structEndArchive64 = "<4sQ2H2L4Q"
120stringEndArchive64 = "PK\x06\x06"
121sizeEndCentDir64 = struct.calcsize(structEndArchive64)
122
123_CD64_SIGNATURE = 0
124_CD64_DIRECTORY_RECSIZE = 1
125_CD64_CREATE_VERSION = 2
126_CD64_EXTRACT_VERSION = 3
127_CD64_DISK_NUMBER = 4
128_CD64_DISK_NUMBER_START = 5
129_CD64_NUMBER_ENTRIES_THIS_DISK = 6
130_CD64_NUMBER_ENTRIES_TOTAL = 7
131_CD64_DIRECTORY_SIZE = 8
132_CD64_OFFSET_START_CENTDIR = 9
133
134def _check_zipfile(fp):
135    try:
136        if _EndRecData(fp):
137            return True         # file has correct magic number
138    except IOError:
139        pass
140    return False
141
142def is_zipfile(filename):
143    """Quickly see if a file is a ZIP file by checking the magic number.
144
145    The filename argument may be a file or file-like object too.
146    """
147    result = False
148    try:
149        if hasattr(filename, "read"):
150            result = _check_zipfile(fp=filename)
151        else:
152            with open(filename, "rb") as fp:
153                result = _check_zipfile(fp)
154    except IOError:
155        pass
156    return result
157
158def _EndRecData64(fpin, offset, endrec):
159    """
160    Read the ZIP64 end-of-archive records and use that to update endrec
161    """
162    try:
163        fpin.seek(offset - sizeEndCentDir64Locator, 2)
164    except IOError:
165        # If the seek fails, the file is not large enough to contain a ZIP64
166        # end-of-archive record, so just return the end record we were given.
167        return endrec
168
169    data = fpin.read(sizeEndCentDir64Locator)
170    if len(data) != sizeEndCentDir64Locator:
171        return endrec
172    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173    if sig != stringEndArchive64Locator:
174        return endrec
175
176    if diskno != 0 or disks != 1:
177        raise BadZipfile("zipfiles that span multiple disks are not supported")
178
179    # Assume no 'zip64 extensible data'
180    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181    data = fpin.read(sizeEndCentDir64)
182    if len(data) != sizeEndCentDir64:
183        return endrec
184    sig, sz, create_version, read_version, disk_num, disk_dir, \
185            dircount, dircount2, dirsize, diroffset = \
186            struct.unpack(structEndArchive64, data)
187    if sig != stringEndArchive64:
188        return endrec
189
190    # Update the original endrec using data from the ZIP64 record
191    endrec[_ECD_SIGNATURE] = sig
192    endrec[_ECD_DISK_NUMBER] = disk_num
193    endrec[_ECD_DISK_START] = disk_dir
194    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
195    endrec[_ECD_ENTRIES_TOTAL] = dircount2
196    endrec[_ECD_SIZE] = dirsize
197    endrec[_ECD_OFFSET] = diroffset
198    return endrec
199
200
201def _EndRecData(fpin):
202    """Return data from the "End of Central Directory" record, or None.
203
204    The data is a list of the nine items in the ZIP "End of central dir"
205    record followed by a tenth item, the file seek offset of this record."""
206
207    # Determine file size
208    fpin.seek(0, 2)
209    filesize = fpin.tell()
210
211    # Check to see if this is ZIP file with no archive comment (the
212    # "end of central directory" structure should be the last item in the
213    # file if this is the case).
214    try:
215        fpin.seek(-sizeEndCentDir, 2)
216    except IOError:
217        return None
218    data = fpin.read()
219    if (len(data) == sizeEndCentDir and
220        data[0:4] == stringEndArchive and
221        data[-2:] == b"\000\000"):
222        # the signature is correct and there's no comment, unpack structure
223        endrec = struct.unpack(structEndArchive, data)
224        endrec=list(endrec)
225
226        # Append a blank comment and record start offset
227        endrec.append("")
228        endrec.append(filesize - sizeEndCentDir)
229
230        # Try to read the "Zip64 end of central directory" structure
231        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
232
233    # Either this is not a ZIP file, or it is a ZIP file with an archive
234    # comment.  Search the end of the file for the "end of central directory"
235    # record signature. The comment is the last item in the ZIP file and may be
236    # up to 64K long.  It is assumed that the "end of central directory" magic
237    # number does not appear in the comment.
238    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239    fpin.seek(maxCommentStart, 0)
240    data = fpin.read()
241    start = data.rfind(stringEndArchive)
242    if start >= 0:
243        # found the magic number; attempt to unpack and interpret
244        recData = data[start:start+sizeEndCentDir]
245        if len(recData) != sizeEndCentDir:
246            # Zip file is corrupted.
247            return None
248        endrec = list(struct.unpack(structEndArchive, recData))
249        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251        endrec.append(comment)
252        endrec.append(maxCommentStart + start)
253
254        # Try to read the "Zip64 end of central directory" structure
255        return _EndRecData64(fpin, maxCommentStart + start - filesize,
256                             endrec)
257
258    # Unable to find a valid end of central directory structure
259    return None
260
261
262class ZipInfo (object):
263    """Class with attributes describing each file in the ZIP archive."""
264
265    __slots__ = (
266            'orig_filename',
267            'filename',
268            'date_time',
269            'compress_type',
270            'comment',
271            'extra',
272            'create_system',
273            'create_version',
274            'extract_version',
275            'reserved',
276            'flag_bits',
277            'volume',
278            'internal_attr',
279            'external_attr',
280            'header_offset',
281            'CRC',
282            'compress_size',
283            'file_size',
284            '_raw_time',
285        )
286
287    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
288        self.orig_filename = filename   # Original file name in archive
289
290        # Terminate the file name at the first null byte.  Null bytes in file
291        # names are used as tricks by viruses in archives.
292        null_byte = filename.find(chr(0))
293        if null_byte >= 0:
294            filename = filename[0:null_byte]
295        # This is used to ensure paths in generated ZIP files always use
296        # forward slashes as the directory separator, as required by the
297        # ZIP format specification.
298        if os.sep != "/" and os.sep in filename:
299            filename = filename.replace(os.sep, "/")
300
301        self.filename = filename        # Normalized file name
302        self.date_time = date_time      # year, month, day, hour, min, sec
303
304        if date_time[0] < 1980:
305            raise ValueError('ZIP does not support timestamps before 1980')
306
307        # Standard values:
308        self.compress_type = ZIP_STORED # Type of compression for the file
309        self.comment = ""               # Comment for each file
310        self.extra = ""                 # ZIP extra data
311        if sys.platform == 'win32':
312            self.create_system = 0          # System which created ZIP archive
313        else:
314            # Assume everything else is unix-y
315            self.create_system = 3          # System which created ZIP archive
316        self.create_version = 20        # Version which created ZIP archive
317        self.extract_version = 20       # Version needed to extract archive
318        self.reserved = 0               # Must be zero
319        self.flag_bits = 0              # ZIP flag bits
320        self.volume = 0                 # Volume number of file header
321        self.internal_attr = 0          # Internal attributes
322        self.external_attr = 0          # External file attributes
323        # Other attributes are set by class ZipFile:
324        # header_offset         Byte offset to the file header
325        # CRC                   CRC-32 of the uncompressed file
326        # compress_size         Size of the compressed file
327        # file_size             Size of the uncompressed file
328
329    def FileHeader(self, zip64=None):
330        """Return the per-file header as a string."""
331        dt = self.date_time
332        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
333        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
334        if self.flag_bits & 0x08:
335            # Set these to zero because we write them after the file data
336            CRC = compress_size = file_size = 0
337        else:
338            CRC = self.CRC
339            compress_size = self.compress_size
340            file_size = self.file_size
341
342        extra = self.extra
343
344        if zip64 is None:
345            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346        if zip64:
347            fmt = '<HHQQ'
348            extra = extra + struct.pack(fmt,
349                    1, struct.calcsize(fmt)-4, file_size, compress_size)
350        if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351            if not zip64:
352                raise LargeZipFile("Filesize would require ZIP64 extensions")
353            # File is larger than what fits into a 4 byte integer,
354            # fall back to the ZIP64 extension
355            file_size = 0xffffffff
356            compress_size = 0xffffffff
357            self.extract_version = max(45, self.extract_version)
358            self.create_version = max(45, self.extract_version)
359
360        filename, flag_bits = self._encodeFilenameFlags()
361        header = struct.pack(structFileHeader, stringFileHeader,
362                 self.extract_version, self.reserved, flag_bits,
363                 self.compress_type, dostime, dosdate, CRC,
364                 compress_size, file_size,
365                 len(filename), len(extra))
366        return header + filename + extra
367
368    def _encodeFilenameFlags(self):
369        if isinstance(self.filename, unicode):
370            try:
371                return self.filename.encode('ascii'), self.flag_bits
372            except UnicodeEncodeError:
373                return self.filename.encode('utf-8'), self.flag_bits | 0x800
374        else:
375            return self.filename, self.flag_bits
376
377    def _decodeFilename(self):
378        if self.flag_bits & 0x800:
379            return self.filename.decode('utf-8')
380        else:
381            return self.filename
382
383    def _decodeExtra(self):
384        # Try to decode the extra field.
385        extra = self.extra
386        unpack = struct.unpack
387        while len(extra) >= 4:
388            tp, ln = unpack('<HH', extra[:4])
389            if tp == 1:
390                if ln >= 24:
391                    counts = unpack('<QQQ', extra[4:28])
392                elif ln == 16:
393                    counts = unpack('<QQ', extra[4:20])
394                elif ln == 8:
395                    counts = unpack('<Q', extra[4:12])
396                elif ln == 0:
397                    counts = ()
398                else:
399                    raise RuntimeError, "Corrupt extra field %s"%(ln,)
400
401                idx = 0
402
403                # ZIP64 extension (large files and/or large archives)
404                if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
405                    self.file_size = counts[idx]
406                    idx += 1
407
408                if self.compress_size == 0xFFFFFFFFL:
409                    self.compress_size = counts[idx]
410                    idx += 1
411
412                if self.header_offset == 0xffffffffL:
413                    old = self.header_offset
414                    self.header_offset = counts[idx]
415                    idx+=1
416
417            extra = extra[ln+4:]
418
419
420class _ZipDecrypter:
421    """Class to handle decryption of files stored within a ZIP archive.
422
423    ZIP supports a password-based form of encryption. Even though known
424    plaintext attacks have been found against it, it is still useful
425    to be able to get data out of such a file.
426
427    Usage:
428        zd = _ZipDecrypter(mypwd)
429        plain_char = zd(cypher_char)
430        plain_text = map(zd, cypher_text)
431    """
432
433    def _GenerateCRCTable():
434        """Generate a CRC-32 table.
435
436        ZIP encryption uses the CRC32 one-byte primitive for scrambling some
437        internal keys. We noticed that a direct implementation is faster than
438        relying on binascii.crc32().
439        """
440        poly = 0xedb88320
441        table = [0] * 256
442        for i in range(256):
443            crc = i
444            for j in range(8):
445                if crc & 1:
446                    crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
447                else:
448                    crc = ((crc >> 1) & 0x7FFFFFFF)
449            table[i] = crc
450        return table
451    crctable = _GenerateCRCTable()
452
453    def _crc32(self, ch, crc):
454        """Compute the CRC32 primitive on one byte."""
455        return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
456
457    def __init__(self, pwd):
458        self.key0 = 305419896
459        self.key1 = 591751049
460        self.key2 = 878082192
461        for p in pwd:
462            self._UpdateKeys(p)
463
464    def _UpdateKeys(self, c):
465        self.key0 = self._crc32(c, self.key0)
466        self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
467        self.key1 = (self.key1 * 134775813 + 1) & 4294967295
468        self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
469
470    def __call__(self, c):
471        """Decrypt a single character."""
472        c = ord(c)
473        k = self.key2 | 2
474        c = c ^ (((k * (k^1)) >> 8) & 255)
475        c = chr(c)
476        self._UpdateKeys(c)
477        return c
478
479
480compressor_names = {
481    0: 'store',
482    1: 'shrink',
483    2: 'reduce',
484    3: 'reduce',
485    4: 'reduce',
486    5: 'reduce',
487    6: 'implode',
488    7: 'tokenize',
489    8: 'deflate',
490    9: 'deflate64',
491    10: 'implode',
492    12: 'bzip2',
493    14: 'lzma',
494    18: 'terse',
495    19: 'lz77',
496    97: 'wavpack',
497    98: 'ppmd',
498}
499
500
501class ZipExtFile(io.BufferedIOBase):
502    """File-like object for reading an archive member.
503       Is returned by ZipFile.open().
504    """
505
506    # Max size supported by decompressor.
507    MAX_N = 1 << 31 - 1
508
509    # Read from compressed files in 4k blocks.
510    MIN_READ_SIZE = 4096
511
512    # Search for universal newlines or line chunks.
513    PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
514
515    def __init__(self, fileobj, mode, zipinfo, decrypter=None,
516            close_fileobj=False):
517        self._fileobj = fileobj
518        self._decrypter = decrypter
519        self._close_fileobj = close_fileobj
520
521        self._compress_type = zipinfo.compress_type
522        self._compress_size = zipinfo.compress_size
523        self._compress_left = zipinfo.compress_size
524
525        if self._compress_type == ZIP_DEFLATED:
526            self._decompressor = zlib.decompressobj(-15)
527        elif self._compress_type != ZIP_STORED:
528            descr = compressor_names.get(self._compress_type)
529            if descr:
530                raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
531            else:
532                raise NotImplementedError("compression type %d" % (self._compress_type,))
533        self._unconsumed = ''
534
535        self._readbuffer = ''
536        self._offset = 0
537
538        self._universal = 'U' in mode
539        self.newlines = None
540
541        # Adjust read size for encrypted files since the first 12 bytes
542        # are for the encryption/password information.
543        if self._decrypter is not None:
544            self._compress_left -= 12
545
546        self.mode = mode
547        self.name = zipinfo.filename
548
549        if hasattr(zipinfo, 'CRC'):
550            self._expected_crc = zipinfo.CRC
551            self._running_crc = crc32(b'') & 0xffffffff
552        else:
553            self._expected_crc = None
554
555    def readline(self, limit=-1):
556        """Read and return a line from the stream.
557
558        If limit is specified, at most limit bytes will be read.
559        """
560
561        if not self._universal and limit < 0:
562            # Shortcut common case - newline found in buffer.
563            i = self._readbuffer.find('\n', self._offset) + 1
564            if i > 0:
565                line = self._readbuffer[self._offset: i]
566                self._offset = i
567                return line
568
569        if not self._universal:
570            return io.BufferedIOBase.readline(self, limit)
571
572        line = ''
573        while limit < 0 or len(line) < limit:
574            readahead = self.peek(2)
575            if readahead == '':
576                return line
577
578            #
579            # Search for universal newlines or line chunks.
580            #
581            # The pattern returns either a line chunk or a newline, but not
582            # both. Combined with peek(2), we are assured that the sequence
583            # '\r\n' is always retrieved completely and never split into
584            # separate newlines - '\r', '\n' due to coincidental readaheads.
585            #
586            match = self.PATTERN.search(readahead)
587            newline = match.group('newline')
588            if newline is not None:
589                if self.newlines is None:
590                    self.newlines = []
591                if newline not in self.newlines:
592                    self.newlines.append(newline)
593                self._offset += len(newline)
594                return line + '\n'
595
596            chunk = match.group('chunk')
597            if limit >= 0:
598                chunk = chunk[: limit - len(line)]
599
600            self._offset += len(chunk)
601            line += chunk
602
603        return line
604
605    def peek(self, n=1):
606        """Returns buffered bytes without advancing the position."""
607        if n > len(self._readbuffer) - self._offset:
608            chunk = self.read(n)
609            if len(chunk) > self._offset:
610                self._readbuffer = chunk + self._readbuffer[self._offset:]
611                self._offset = 0
612            else:
613                self._offset -= len(chunk)
614
615        # Return up to 512 bytes to reduce allocation overhead for tight loops.
616        return self._readbuffer[self._offset: self._offset + 512]
617
618    def readable(self):
619        return True
620
621    def read(self, n=-1):
622        """Read and return up to n bytes.
623        If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
624        """
625        buf = ''
626        if n is None:
627            n = -1
628        while True:
629            if n < 0:
630                data = self.read1(n)
631            elif n > len(buf):
632                data = self.read1(n - len(buf))
633            else:
634                return buf
635            if len(data) == 0:
636                return buf
637            buf += data
638
639    def _update_crc(self, newdata, eof):
640        # Update the CRC using the given data.
641        if self._expected_crc is None:
642            # No need to compute the CRC if we don't have a reference value
643            return
644        self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
645        # Check the CRC if we're at the end of the file
646        if eof and self._running_crc != self._expected_crc:
647            raise BadZipfile("Bad CRC-32 for file %r" % self.name)
648
649    def read1(self, n):
650        """Read up to n bytes with at most one read() system call."""
651
652        # Simplify algorithm (branching) by transforming negative n to large n.
653        if n < 0 or n is None:
654            n = self.MAX_N
655
656        # Bytes available in read buffer.
657        len_readbuffer = len(self._readbuffer) - self._offset
658
659        # Read from file.
660        if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
661            nbytes = n - len_readbuffer - len(self._unconsumed)
662            nbytes = max(nbytes, self.MIN_READ_SIZE)
663            nbytes = min(nbytes, self._compress_left)
664
665            data = self._fileobj.read(nbytes)
666            self._compress_left -= len(data)
667
668            if data and self._decrypter is not None:
669                data = ''.join(map(self._decrypter, data))
670
671            if self._compress_type == ZIP_STORED:
672                self._update_crc(data, eof=(self._compress_left==0))
673                self._readbuffer = self._readbuffer[self._offset:] + data
674                self._offset = 0
675            else:
676                # Prepare deflated bytes for decompression.
677                self._unconsumed += data
678
679        # Handle unconsumed data.
680        if (len(self._unconsumed) > 0 and n > len_readbuffer and
681            self._compress_type == ZIP_DEFLATED):
682            data = self._decompressor.decompress(
683                self._unconsumed,
684                max(n - len_readbuffer, self.MIN_READ_SIZE)
685            )
686
687            self._unconsumed = self._decompressor.unconsumed_tail
688            eof = len(self._unconsumed) == 0 and self._compress_left == 0
689            if eof:
690                data += self._decompressor.flush()
691
692            self._update_crc(data, eof=eof)
693            self._readbuffer = self._readbuffer[self._offset:] + data
694            self._offset = 0
695
696        # Read from buffer.
697        data = self._readbuffer[self._offset: self._offset + n]
698        self._offset += len(data)
699        return data
700
701    def close(self):
702        try :
703            if self._close_fileobj:
704                self._fileobj.close()
705        finally:
706            super(ZipExtFile, self).close()
707
708
709class ZipFile(object):
710    """ Class with methods to open, read, write, close, list zip files.
711
712    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
713
714    file: Either the path to the file, or a file-like object.
715          If it is a path, the file will be opened and closed by ZipFile.
716    mode: The mode can be either read "r", write "w" or append "a".
717    compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
718    allowZip64: if True ZipFile will create files with ZIP64 extensions when
719                needed, otherwise it will raise an exception when this would
720                be necessary.
721
722    """
723
724    fp = None                   # Set here since __del__ checks it
725
726    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
727        """Open the ZIP file with mode read "r", write "w" or append "a"."""
728        if mode not in ("r", "w", "a"):
729            raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
730
731        if compression == ZIP_STORED:
732            pass
733        elif compression == ZIP_DEFLATED:
734            if not zlib:
735                raise RuntimeError,\
736                      "Compression requires the (missing) zlib module"
737        else:
738            raise RuntimeError, "That compression method is not supported"
739
740        self._allowZip64 = allowZip64
741        self._didModify = False
742        self.debug = 0  # Level of printing: 0 through 3
743        self.NameToInfo = {}    # Find file info given name
744        self.filelist = []      # List of ZipInfo instances for archive
745        self.compression = compression  # Method of compression
746        self.mode = key = mode.replace('b', '')[0]
747        self.pwd = None
748        self._comment = ''
749
750        # Check if we were passed a file-like object
751        if isinstance(file, basestring):
752            self._filePassed = 0
753            self.filename = file
754            modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
755            try:
756                self.fp = open(file, modeDict[mode])
757            except IOError:
758                if mode == 'a':
759                    mode = key = 'w'
760                    self.fp = open(file, modeDict[mode])
761                else:
762                    raise
763        else:
764            self._filePassed = 1
765            self.fp = file
766            self.filename = getattr(file, 'name', None)
767
768        try:
769            if key == 'r':
770                self._RealGetContents()
771            elif key == 'w':
772                # set the modified flag so central directory gets written
773                # even if no files are added to the archive
774                self._didModify = True
775            elif key == 'a':
776                try:
777                    # See if file is a zip file
778                    self._RealGetContents()
779                    # seek to start of directory and overwrite
780                    self.fp.seek(self.start_dir, 0)
781                except BadZipfile:
782                    # file is not a zip file, just append
783                    self.fp.seek(0, 2)
784
785                    # set the modified flag so central directory gets written
786                    # even if no files are added to the archive
787                    self._didModify = True
788            else:
789                raise RuntimeError('Mode must be "r", "w" or "a"')
790        except:
791            fp = self.fp
792            self.fp = None
793            if not self._filePassed:
794                fp.close()
795            raise
796
797    def __enter__(self):
798        return self
799
800    def __exit__(self, type, value, traceback):
801        self.close()
802
803    def _RealGetContents(self):
804        """Read in the table of contents for the ZIP file."""
805        fp = self.fp
806        try:
807            endrec = _EndRecData(fp)
808        except IOError:
809            raise BadZipfile("File is not a zip file")
810        if not endrec:
811            raise BadZipfile, "File is not a zip file"
812        if self.debug > 1:
813            print endrec
814        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
815        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
816        self._comment = endrec[_ECD_COMMENT]    # archive comment
817
818        # "concat" is zero, unless zip was concatenated to another file
819        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
820        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
821            # If Zip64 extension structures are present, account for them
822            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
823
824        if self.debug > 2:
825            inferred = concat + offset_cd
826            print "given, inferred, offset", offset_cd, inferred, concat
827        # self.start_dir:  Position of start of central directory
828        self.start_dir = offset_cd + concat
829        fp.seek(self.start_dir, 0)
830        data = fp.read(size_cd)
831        fp = cStringIO.StringIO(data)
832        total = 0
833        while total < size_cd:
834            centdir = fp.read(sizeCentralDir)
835            if len(centdir) != sizeCentralDir:
836                raise BadZipfile("Truncated central directory")
837            centdir = struct.unpack(structCentralDir, centdir)
838            if centdir[_CD_SIGNATURE] != stringCentralDir:
839                raise BadZipfile("Bad magic number for central directory")
840            if self.debug > 2:
841                print centdir
842            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
843            # Create ZipInfo instance to store file information
844            x = ZipInfo(filename)
845            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
846            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
847            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
848            (x.create_version, x.create_system, x.extract_version, x.reserved,
849                x.flag_bits, x.compress_type, t, d,
850                x.CRC, x.compress_size, x.file_size) = centdir[1:12]
851            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
852            # Convert date/time code to (year, month, day, hour, min, sec)
853            x._raw_time = t
854            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
855                                     t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
856
857            x._decodeExtra()
858            x.header_offset = x.header_offset + concat
859            x.filename = x._decodeFilename()
860            self.filelist.append(x)
861            self.NameToInfo[x.filename] = x
862
863            # update total bytes read from central directory
864            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
865                     + centdir[_CD_EXTRA_FIELD_LENGTH]
866                     + centdir[_CD_COMMENT_LENGTH])
867
868            if self.debug > 2:
869                print "total", total
870
871
872    def namelist(self):
873        """Return a list of file names in the archive."""
874        l = []
875        for data in self.filelist:
876            l.append(data.filename)
877        return l
878
879    def infolist(self):
880        """Return a list of class ZipInfo instances for files in the
881        archive."""
882        return self.filelist
883
884    def printdir(self):
885        """Print a table of contents for the zip file."""
886        print "%-46s %19s %12s" % ("File Name", "Modified    ", "Size")
887        for zinfo in self.filelist:
888            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
889            print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
890
891    def testzip(self):
892        """Read all the files and check the CRC."""
893        chunk_size = 2 ** 20
894        for zinfo in self.filelist:
895            try:
896                # Read by chunks, to avoid an OverflowError or a
897                # MemoryError with very large embedded files.
898                with self.open(zinfo.filename, "r") as f:
899                    while f.read(chunk_size):     # Check CRC-32
900                        pass
901            except BadZipfile:
902                return zinfo.filename
903
904    def getinfo(self, name):
905        """Return the instance of ZipInfo given 'name'."""
906        info = self.NameToInfo.get(name)
907        if info is None:
908            raise KeyError(
909                'There is no item named %r in the archive' % name)
910
911        return info
912
913    def setpassword(self, pwd):
914        """Set default password for encrypted files."""
915        self.pwd = pwd
916
917    @property
918    def comment(self):
919        """The comment text associated with the ZIP file."""
920        return self._comment
921
922    @comment.setter
923    def comment(self, comment):
924        # check for valid comment length
925        if len(comment) > ZIP_MAX_COMMENT:
926            import warnings
927            warnings.warn('Archive comment is too long; truncating to %d bytes'
928                          % ZIP_MAX_COMMENT, stacklevel=2)
929            comment = comment[:ZIP_MAX_COMMENT]
930        self._comment = comment
931        self._didModify = True
932
933    def read(self, name, pwd=None):
934        """Return file bytes (as a string) for name."""
935        return self.open(name, "r", pwd).read()
936
937    def open(self, name, mode="r", pwd=None):
938        """Return file-like object for 'name'."""
939        if mode not in ("r", "U", "rU"):
940            raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
941        if not self.fp:
942            raise RuntimeError, \
943                  "Attempt to read ZIP archive that was already closed"
944
945        # Only open a new file for instances where we were not
946        # given a file object in the constructor
947        if self._filePassed:
948            zef_file = self.fp
949            should_close = False
950        else:
951            zef_file = open(self.filename, 'rb')
952            should_close = True
953
954        try:
955            # Make sure we have an info object
956            if isinstance(name, ZipInfo):
957                # 'name' is already an info object
958                zinfo = name
959            else:
960                # Get info object for name
961                zinfo = self.getinfo(name)
962
963            zef_file.seek(zinfo.header_offset, 0)
964
965            # Skip the file header:
966            fheader = zef_file.read(sizeFileHeader)
967            if len(fheader) != sizeFileHeader:
968                raise BadZipfile("Truncated file header")
969            fheader = struct.unpack(structFileHeader, fheader)
970            if fheader[_FH_SIGNATURE] != stringFileHeader:
971                raise BadZipfile("Bad magic number for file header")
972
973            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
974            if fheader[_FH_EXTRA_FIELD_LENGTH]:
975                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
976
977            if fname != zinfo.orig_filename:
978                raise BadZipfile, \
979                        'File name in directory "%s" and header "%s" differ.' % (
980                            zinfo.orig_filename, fname)
981
982            # check for encrypted flag & handle password
983            is_encrypted = zinfo.flag_bits & 0x1
984            zd = None
985            if is_encrypted:
986                if not pwd:
987                    pwd = self.pwd
988                if not pwd:
989                    raise RuntimeError, "File %s is encrypted, " \
990                        "password required for extraction" % name
991
992                zd = _ZipDecrypter(pwd)
993                # The first 12 bytes in the cypher stream is an encryption header
994                #  used to strengthen the algorithm. The first 11 bytes are
995                #  completely random, while the 12th contains the MSB of the CRC,
996                #  or the MSB of the file time depending on the header type
997                #  and is used to check the correctness of the password.
998                bytes = zef_file.read(12)
999                h = map(zd, bytes[0:12])
1000                if zinfo.flag_bits & 0x8:
1001                    # compare against the file type from extended local headers
1002                    check_byte = (zinfo._raw_time >> 8) & 0xff
1003                else:
1004                    # compare against the CRC otherwise
1005                    check_byte = (zinfo.CRC >> 24) & 0xff
1006                if ord(h[11]) != check_byte:
1007                    raise RuntimeError("Bad password for file", name)
1008
1009            return ZipExtFile(zef_file, mode, zinfo, zd,
1010                    close_fileobj=should_close)
1011        except:
1012            if should_close:
1013                zef_file.close()
1014            raise
1015
1016    def extract(self, member, path=None, pwd=None):
1017        """Extract a member from the archive to the current working directory,
1018           using its full name. Its file information is extracted as accurately
1019           as possible. `member' may be a filename or a ZipInfo object. You can
1020           specify a different directory using `path'.
1021        """
1022        if not isinstance(member, ZipInfo):
1023            member = self.getinfo(member)
1024
1025        if path is None:
1026            path = os.getcwd()
1027
1028        return self._extract_member(member, path, pwd)
1029
1030    def extractall(self, path=None, members=None, pwd=None):
1031        """Extract all members from the archive to the current working
1032           directory. `path' specifies a different directory to extract to.
1033           `members' is optional and must be a subset of the list returned
1034           by namelist().
1035        """
1036        if members is None:
1037            members = self.namelist()
1038
1039        for zipinfo in members:
1040            self.extract(zipinfo, path, pwd)
1041
1042    def _extract_member(self, member, targetpath, pwd):
1043        """Extract the ZipInfo object 'member' to a physical
1044           file on the path targetpath.
1045        """
1046        # build the destination pathname, replacing
1047        # forward slashes to platform specific separators.
1048        arcname = member.filename.replace('/', os.path.sep)
1049
1050        if os.path.altsep:
1051            arcname = arcname.replace(os.path.altsep, os.path.sep)
1052        # interpret absolute pathname as relative, remove drive letter or
1053        # UNC path, redundant separators, "." and ".." components.
1054        arcname = os.path.splitdrive(arcname)[1]
1055        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1056                    if x not in ('', os.path.curdir, os.path.pardir))
1057        if os.path.sep == '\\':
1058            # filter illegal characters on Windows
1059            illegal = ':<>|"?*'
1060            if isinstance(arcname, unicode):
1061                table = {ord(c): ord('_') for c in illegal}
1062            else:
1063                table = string.maketrans(illegal, '_' * len(illegal))
1064            arcname = arcname.translate(table)
1065            # remove trailing dots
1066            arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1067            arcname = os.path.sep.join(x for x in arcname if x)
1068
1069        targetpath = os.path.join(targetpath, arcname)
1070        targetpath = os.path.normpath(targetpath)
1071
1072        # Create all upper directories if necessary.
1073        upperdirs = os.path.dirname(targetpath)
1074        if upperdirs and not os.path.exists(upperdirs):
1075            os.makedirs(upperdirs)
1076
1077        if member.filename[-1] == '/':
1078            if not os.path.isdir(targetpath):
1079                os.mkdir(targetpath)
1080            return targetpath
1081
1082        with self.open(member, pwd=pwd) as source, \
1083             file(targetpath, "wb") as target:
1084            shutil.copyfileobj(source, target)
1085
1086        return targetpath
1087
1088    def _writecheck(self, zinfo):
1089        """Check for errors before writing a file to the archive."""
1090        if zinfo.filename in self.NameToInfo:
1091            import warnings
1092            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1093        if self.mode not in ("w", "a"):
1094            raise RuntimeError, 'write() requires mode "w" or "a"'
1095        if not self.fp:
1096            raise RuntimeError, \
1097                  "Attempt to write ZIP archive that was already closed"
1098        if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1099            raise RuntimeError, \
1100                  "Compression requires the (missing) zlib module"
1101        if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1102            raise RuntimeError, \
1103                  "That compression method is not supported"
1104        if not self._allowZip64:
1105            requires_zip64 = None
1106            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1107                requires_zip64 = "Files count"
1108            elif zinfo.file_size > ZIP64_LIMIT:
1109                requires_zip64 = "Filesize"
1110            elif zinfo.header_offset > ZIP64_LIMIT:
1111                requires_zip64 = "Zipfile size"
1112            if requires_zip64:
1113                raise LargeZipFile(requires_zip64 +
1114                                   " would require ZIP64 extensions")
1115
1116    def write(self, filename, arcname=None, compress_type=None):
1117        """Put the bytes from filename into the archive under the name
1118        arcname."""
1119        if not self.fp:
1120            raise RuntimeError(
1121                  "Attempt to write to ZIP archive that was already closed")
1122
1123        st = os.stat(filename)
1124        isdir = stat.S_ISDIR(st.st_mode)
1125        mtime = time.localtime(st.st_mtime)
1126        date_time = mtime[0:6]
1127        # Create ZipInfo instance to store file information
1128        if arcname is None:
1129            arcname = filename
1130        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1131        while arcname[0] in (os.sep, os.altsep):
1132            arcname = arcname[1:]
1133        if isdir:
1134            arcname += '/'
1135        zinfo = ZipInfo(arcname, date_time)
1136        zinfo.external_attr = (st[0] & 0xFFFF) << 16L      # Unix attributes
1137        if compress_type is None:
1138            zinfo.compress_type = self.compression
1139        else:
1140            zinfo.compress_type = compress_type
1141
1142        zinfo.file_size = st.st_size
1143        zinfo.flag_bits = 0x00
1144        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1145
1146        self._writecheck(zinfo)
1147        self._didModify = True
1148
1149        if isdir:
1150            zinfo.file_size = 0
1151            zinfo.compress_size = 0
1152            zinfo.CRC = 0
1153            zinfo.external_attr |= 0x10  # MS-DOS directory flag
1154            self.filelist.append(zinfo)
1155            self.NameToInfo[zinfo.filename] = zinfo
1156            self.fp.write(zinfo.FileHeader(False))
1157            return
1158
1159        with open(filename, "rb") as fp:
1160            # Must overwrite CRC and sizes with correct data later
1161            zinfo.CRC = CRC = 0
1162            zinfo.compress_size = compress_size = 0
1163            # Compressed size can be larger than uncompressed size
1164            zip64 = self._allowZip64 and \
1165                    zinfo.file_size * 1.05 > ZIP64_LIMIT
1166            self.fp.write(zinfo.FileHeader(zip64))
1167            if zinfo.compress_type == ZIP_DEFLATED:
1168                cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1169                     zlib.DEFLATED, -15)
1170            else:
1171                cmpr = None
1172            file_size = 0
1173            while 1:
1174                buf = fp.read(1024 * 8)
1175                if not buf:
1176                    break
1177                file_size = file_size + len(buf)
1178                CRC = crc32(buf, CRC) & 0xffffffff
1179                if cmpr:
1180                    buf = cmpr.compress(buf)
1181                    compress_size = compress_size + len(buf)
1182                self.fp.write(buf)
1183        if cmpr:
1184            buf = cmpr.flush()
1185            compress_size = compress_size + len(buf)
1186            self.fp.write(buf)
1187            zinfo.compress_size = compress_size
1188        else:
1189            zinfo.compress_size = file_size
1190        zinfo.CRC = CRC
1191        zinfo.file_size = file_size
1192        if not zip64 and self._allowZip64:
1193            if file_size > ZIP64_LIMIT:
1194                raise RuntimeError('File size has increased during compressing')
1195            if compress_size > ZIP64_LIMIT:
1196                raise RuntimeError('Compressed size larger than uncompressed size')
1197        # Seek backwards and write file header (which will now include
1198        # correct CRC and file sizes)
1199        position = self.fp.tell()       # Preserve current position in file
1200        self.fp.seek(zinfo.header_offset, 0)
1201        self.fp.write(zinfo.FileHeader(zip64))
1202        self.fp.seek(position, 0)
1203        self.filelist.append(zinfo)
1204        self.NameToInfo[zinfo.filename] = zinfo
1205
1206    def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1207        """Write a file into the archive.  The contents is the string
1208        'bytes'.  'zinfo_or_arcname' is either a ZipInfo instance or
1209        the name of the file in the archive."""
1210        if not isinstance(zinfo_or_arcname, ZipInfo):
1211            zinfo = ZipInfo(filename=zinfo_or_arcname,
1212                            date_time=time.localtime(time.time())[:6])
1213
1214            zinfo.compress_type = self.compression
1215            if zinfo.filename[-1] == '/':
1216                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1217                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1218            else:
1219                zinfo.external_attr = 0o600 << 16     # ?rw-------
1220        else:
1221            zinfo = zinfo_or_arcname
1222
1223        if not self.fp:
1224            raise RuntimeError(
1225                  "Attempt to write to ZIP archive that was already closed")
1226
1227        if compress_type is not None:
1228            zinfo.compress_type = compress_type
1229
1230        zinfo.file_size = len(bytes)            # Uncompressed size
1231        zinfo.header_offset = self.fp.tell()    # Start of header bytes
1232        self._writecheck(zinfo)
1233        self._didModify = True
1234        zinfo.CRC = crc32(bytes) & 0xffffffff       # CRC-32 checksum
1235        if zinfo.compress_type == ZIP_DEFLATED:
1236            co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1237                 zlib.DEFLATED, -15)
1238            bytes = co.compress(bytes) + co.flush()
1239            zinfo.compress_size = len(bytes)    # Compressed size
1240        else:
1241            zinfo.compress_size = zinfo.file_size
1242        zip64 = zinfo.file_size > ZIP64_LIMIT or \
1243                zinfo.compress_size > ZIP64_LIMIT
1244        if zip64 and not self._allowZip64:
1245            raise LargeZipFile("Filesize would require ZIP64 extensions")
1246        self.fp.write(zinfo.FileHeader(zip64))
1247        self.fp.write(bytes)
1248        if zinfo.flag_bits & 0x08:
1249            # Write CRC and file sizes after the file data
1250            fmt = '<LQQ' if zip64 else '<LLL'
1251            self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1252                  zinfo.file_size))
1253        self.fp.flush()
1254        self.filelist.append(zinfo)
1255        self.NameToInfo[zinfo.filename] = zinfo
1256
1257    def __del__(self):
1258        """Call the "close()" method in case the user forgot."""
1259        self.close()
1260
1261    def close(self):
1262        """Close the file, and for mode "w" and "a" write the ending
1263        records."""
1264        if self.fp is None:
1265            return
1266
1267        try:
1268            if self.mode in ("w", "a") and self._didModify: # write ending records
1269                pos1 = self.fp.tell()
1270                for zinfo in self.filelist:         # write central directory
1271                    dt = zinfo.date_time
1272                    dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1273                    dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1274                    extra = []
1275                    if zinfo.file_size > ZIP64_LIMIT \
1276                            or zinfo.compress_size > ZIP64_LIMIT:
1277                        extra.append(zinfo.file_size)
1278                        extra.append(zinfo.compress_size)
1279                        file_size = 0xffffffff
1280                        compress_size = 0xffffffff
1281                    else:
1282                        file_size = zinfo.file_size
1283                        compress_size = zinfo.compress_size
1284
1285                    if zinfo.header_offset > ZIP64_LIMIT:
1286                        extra.append(zinfo.header_offset)
1287                        header_offset = 0xffffffffL
1288                    else:
1289                        header_offset = zinfo.header_offset
1290
1291                    extra_data = zinfo.extra
1292                    if extra:
1293                        # Append a ZIP64 field to the extra's
1294                        extra_data = struct.pack(
1295                                '<HH' + 'Q'*len(extra),
1296                                1, 8*len(extra), *extra) + extra_data
1297
1298                        extract_version = max(45, zinfo.extract_version)
1299                        create_version = max(45, zinfo.create_version)
1300                    else:
1301                        extract_version = zinfo.extract_version
1302                        create_version = zinfo.create_version
1303
1304                    try:
1305                        filename, flag_bits = zinfo._encodeFilenameFlags()
1306                        centdir = struct.pack(structCentralDir,
1307                        stringCentralDir, create_version,
1308                        zinfo.create_system, extract_version, zinfo.reserved,
1309                        flag_bits, zinfo.compress_type, dostime, dosdate,
1310                        zinfo.CRC, compress_size, file_size,
1311                        len(filename), len(extra_data), len(zinfo.comment),
1312                        0, zinfo.internal_attr, zinfo.external_attr,
1313                        header_offset)
1314                    except DeprecationWarning:
1315                        print >>sys.stderr, (structCentralDir,
1316                        stringCentralDir, create_version,
1317                        zinfo.create_system, extract_version, zinfo.reserved,
1318                        zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1319                        zinfo.CRC, compress_size, file_size,
1320                        len(zinfo.filename), len(extra_data), len(zinfo.comment),
1321                        0, zinfo.internal_attr, zinfo.external_attr,
1322                        header_offset)
1323                        raise
1324                    self.fp.write(centdir)
1325                    self.fp.write(filename)
1326                    self.fp.write(extra_data)
1327                    self.fp.write(zinfo.comment)
1328
1329                pos2 = self.fp.tell()
1330                # Write end-of-zip-archive record
1331                centDirCount = len(self.filelist)
1332                centDirSize = pos2 - pos1
1333                centDirOffset = pos1
1334                requires_zip64 = None
1335                if centDirCount > ZIP_FILECOUNT_LIMIT:
1336                    requires_zip64 = "Files count"
1337                elif centDirOffset > ZIP64_LIMIT:
1338                    requires_zip64 = "Central directory offset"
1339                elif centDirSize > ZIP64_LIMIT:
1340                    requires_zip64 = "Central directory size"
1341                if requires_zip64:
1342                    # Need to write the ZIP64 end-of-archive records
1343                    if not self._allowZip64:
1344                        raise LargeZipFile(requires_zip64 +
1345                                           " would require ZIP64 extensions")
1346                    zip64endrec = struct.pack(
1347                            structEndArchive64, stringEndArchive64,
1348                            44, 45, 45, 0, 0, centDirCount, centDirCount,
1349                            centDirSize, centDirOffset)
1350                    self.fp.write(zip64endrec)
1351
1352                    zip64locrec = struct.pack(
1353                            structEndArchive64Locator,
1354                            stringEndArchive64Locator, 0, pos2, 1)
1355                    self.fp.write(zip64locrec)
1356                    centDirCount = min(centDirCount, 0xFFFF)
1357                    centDirSize = min(centDirSize, 0xFFFFFFFF)
1358                    centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1359
1360                endrec = struct.pack(structEndArchive, stringEndArchive,
1361                                    0, 0, centDirCount, centDirCount,
1362                                    centDirSize, centDirOffset, len(self._comment))
1363                self.fp.write(endrec)
1364                self.fp.write(self._comment)
1365                self.fp.flush()
1366        finally:
1367            fp = self.fp
1368            self.fp = None
1369            if not self._filePassed:
1370                fp.close()
1371
1372
1373class PyZipFile(ZipFile):
1374    """Class to create ZIP archives with Python library files and packages."""
1375
1376    def writepy(self, pathname, basename = ""):
1377        """Add all files from "pathname" to the ZIP archive.
1378
1379        If pathname is a package directory, search the directory and
1380        all package subdirectories recursively for all *.py and enter
1381        the modules into the archive.  If pathname is a plain
1382        directory, listdir *.py and enter all modules.  Else, pathname
1383        must be a Python *.py file and the module will be put into the
1384        archive.  Added modules are always module.pyo or module.pyc.
1385        This method will compile the module.py into module.pyc if
1386        necessary.
1387        """
1388        dir, name = os.path.split(pathname)
1389        if os.path.isdir(pathname):
1390            initname = os.path.join(pathname, "__init__.py")
1391            if os.path.isfile(initname):
1392                # This is a package directory, add it
1393                if basename:
1394                    basename = "%s/%s" % (basename, name)
1395                else:
1396                    basename = name
1397                if self.debug:
1398                    print "Adding package in", pathname, "as", basename
1399                fname, arcname = self._get_codename(initname[0:-3], basename)
1400                if self.debug:
1401                    print "Adding", arcname
1402                self.write(fname, arcname)
1403                dirlist = os.listdir(pathname)
1404                dirlist.remove("__init__.py")
1405                # Add all *.py files and package subdirectories
1406                for filename in dirlist:
1407                    path = os.path.join(pathname, filename)
1408                    root, ext = os.path.splitext(filename)
1409                    if os.path.isdir(path):
1410                        if os.path.isfile(os.path.join(path, "__init__.py")):
1411                            # This is a package directory, add it
1412                            self.writepy(path, basename)  # Recursive call
1413                    elif ext == ".py":
1414                        fname, arcname = self._get_codename(path[0:-3],
1415                                         basename)
1416                        if self.debug:
1417                            print "Adding", arcname
1418                        self.write(fname, arcname)
1419            else:
1420                # This is NOT a package directory, add its files at top level
1421                if self.debug:
1422                    print "Adding files from directory", pathname
1423                for filename in os.listdir(pathname):
1424                    path = os.path.join(pathname, filename)
1425                    root, ext = os.path.splitext(filename)
1426                    if ext == ".py":
1427                        fname, arcname = self._get_codename(path[0:-3],
1428                                         basename)
1429                        if self.debug:
1430                            print "Adding", arcname
1431                        self.write(fname, arcname)
1432        else:
1433            if pathname[-3:] != ".py":
1434                raise RuntimeError, \
1435                      'Files added with writepy() must end with ".py"'
1436            fname, arcname = self._get_codename(pathname[0:-3], basename)
1437            if self.debug:
1438                print "Adding file", arcname
1439            self.write(fname, arcname)
1440
1441    def _get_codename(self, pathname, basename):
1442        """Return (filename, archivename) for the path.
1443
1444        Given a module name path, return the correct file path and
1445        archive name, compiling if necessary.  For example, given
1446        /python/lib/string, return (/python/lib/string.pyc, string).
1447        """
1448        file_py  = pathname + ".py"
1449        file_pyc = pathname + ".pyc"
1450        file_pyo = pathname + ".pyo"
1451        if os.path.isfile(file_pyo) and \
1452                            os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1453            fname = file_pyo    # Use .pyo file
1454        elif not os.path.isfile(file_pyc) or \
1455             os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1456            import py_compile
1457            if self.debug:
1458                print "Compiling", file_py
1459            try:
1460                py_compile.compile(file_py, file_pyc, None, True)
1461            except py_compile.PyCompileError,err:
1462                print err.msg
1463            fname = file_pyc
1464        else:
1465            fname = file_pyc
1466        archivename = os.path.split(fname)[1]
1467        if basename:
1468            archivename = "%s/%s" % (basename, archivename)
1469        return (fname, archivename)
1470
1471
1472def main(args = None):
1473    import textwrap
1474    USAGE=textwrap.dedent("""\
1475        Usage:
1476            zipfile.py -l zipfile.zip        # Show listing of a zipfile
1477            zipfile.py -t zipfile.zip        # Test if a zipfile is valid
1478            zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1479            zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1480        """)
1481    if args is None:
1482        args = sys.argv[1:]
1483
1484    if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1485        print USAGE
1486        sys.exit(1)
1487
1488    if args[0] == '-l':
1489        if len(args) != 2:
1490            print USAGE
1491            sys.exit(1)
1492        with ZipFile(args[1], 'r') as zf:
1493            zf.printdir()
1494
1495    elif args[0] == '-t':
1496        if len(args) != 2:
1497            print USAGE
1498            sys.exit(1)
1499        with ZipFile(args[1], 'r') as zf:
1500            badfile = zf.testzip()
1501        if badfile:
1502            print("The following enclosed file is corrupted: {!r}".format(badfile))
1503        print "Done testing"
1504
1505    elif args[0] == '-e':
1506        if len(args) != 3:
1507            print USAGE
1508            sys.exit(1)
1509
1510        with ZipFile(args[1], 'r') as zf:
1511            zf.extractall(args[2])
1512
1513    elif args[0] == '-c':
1514        if len(args) < 3:
1515            print USAGE
1516            sys.exit(1)
1517
1518        def addToZip(zf, path, zippath):
1519            if os.path.isfile(path):
1520                zf.write(path, zippath, ZIP_DEFLATED)
1521            elif os.path.isdir(path):
1522                if zippath:
1523                    zf.write(path, zippath)
1524                for nm in os.listdir(path):
1525                    addToZip(zf,
1526                            os.path.join(path, nm), os.path.join(zippath, nm))
1527            # else: ignore
1528
1529        with ZipFile(args[1], 'w', allowZip64=True) as zf:
1530            for path in args[2:]:
1531                zippath = os.path.basename(path)
1532                if not zippath:
1533                    zippath = os.path.basename(os.path.dirname(path))
1534                if zippath in ('', os.curdir, os.pardir):
1535                    zippath = ''
1536                addToZip(zf, path, zippath)
1537
1538if __name__ == "__main__":
1539    main()
1540