|
||||
0001 # -*- encoding: utf-8 -*- 0002 # back ported from CPython 3 0003 # A. HISTORY OF THE SOFTWARE 0004 # ========================== 0005 # 0006 # Python was created in the early 1990s by Guido van Rossum at Stichting 0007 # Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands 0008 # as a successor of a language called ABC. Guido remains Python's 0009 # principal author, although it includes many contributions from others. 0010 # 0011 # In 1995, Guido continued his work on Python at the Corporation for 0012 # National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) 0013 # in Reston, Virginia where he released several versions of the 0014 # software. 0015 # 0016 # In May 2000, Guido and the Python core development team moved to 0017 # BeOpen.com to form the BeOpen PythonLabs team. In October of the same 0018 # year, the PythonLabs team moved to Digital Creations (now Zope 0019 # Corporation, see http://www.zope.com). In 2001, the Python Software 0020 # Foundation (PSF, see http://www.python.org/psf/) was formed, a 0021 # non-profit organization created specifically to own Python-related 0022 # Intellectual Property. Zope Corporation is a sponsoring member of 0023 # the PSF. 0024 # 0025 # All Python releases are Open Source (see http://www.opensource.org for 0026 # the Open Source Definition). Historically, most, but not all, Python 0027 # releases have also been GPL-compatible; the table below summarizes 0028 # the various releases. 0029 # 0030 # Release Derived Year Owner GPL- 0031 # from compatible? (1) 0032 # 0033 # 0.9.0 thru 1.2 1991-1995 CWI yes 0034 # 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes 0035 # 1.6 1.5.2 2000 CNRI no 0036 # 2.0 1.6 2000 BeOpen.com no 0037 # 1.6.1 1.6 2001 CNRI yes (2) 0038 # 2.1 2.0+1.6.1 2001 PSF no 0039 # 2.0.1 2.0+1.6.1 2001 PSF yes 0040 # 2.1.1 2.1+2.0.1 2001 PSF yes 0041 # 2.2 2.1.1 2001 PSF yes 0042 # 2.1.2 2.1.1 2002 PSF yes 0043 # 2.1.3 2.1.2 2002 PSF yes 0044 # 2.2.1 2.2 2002 PSF yes 0045 # 2.2.2 2.2.1 2002 PSF yes 0046 # 2.2.3 2.2.2 2003 PSF yes 0047 # 2.3 2.2.2 2002-2003 PSF yes 0048 # 2.3.1 2.3 2002-2003 PSF yes 0049 # 2.3.2 2.3.1 2002-2003 PSF yes 0050 # 2.3.3 2.3.2 2002-2003 PSF yes 0051 # 2.3.4 2.3.3 2004 PSF yes 0052 # 2.3.5 2.3.4 2005 PSF yes 0053 # 2.4 2.3 2004 PSF yes 0054 # 2.4.1 2.4 2005 PSF yes 0055 # 2.4.2 2.4.1 2005 PSF yes 0056 # 2.4.3 2.4.2 2006 PSF yes 0057 # 2.4.4 2.4.3 2006 PSF yes 0058 # 2.5 2.4 2006 PSF yes 0059 # 2.5.1 2.5 2007 PSF yes 0060 # 2.5.2 2.5.1 2008 PSF yes 0061 # 2.5.3 2.5.2 2008 PSF yes 0062 # 2.6 2.5 2008 PSF yes 0063 # 2.6.1 2.6 2008 PSF yes 0064 # 2.6.2 2.6.1 2009 PSF yes 0065 # 2.6.3 2.6.2 2009 PSF yes 0066 # 2.6.4 2.6.3 2009 PSF yes 0067 # 2.6.5 2.6.4 2010 PSF yes 0068 # 2.7 2.6 2010 PSF yes 0069 # 0070 # Footnotes: 0071 # 0072 # (1) GPL-compatible doesn't mean that we're distributing Python under 0073 # the GPL. All Python licenses, unlike the GPL, let you distribute 0074 # a modified version without making your changes open source. The 0075 # GPL-compatible licenses make it possible to combine Python with 0076 # other software that is released under the GPL; the others don't. 0077 # 0078 # (2) According to Richard Stallman, 1.6.1 is not GPL-compatible, 0079 # because its license has a choice of law clause. According to 0080 # CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 0081 # is "not incompatible" with the GPL. 0082 # 0083 # Thanks to the many outside volunteers who have worked under Guido's 0084 # direction to make these releases possible. 0085 # 0086 # 0087 # B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON 0088 # =============================================================== 0089 # 0090 # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 0091 # -------------------------------------------- 0092 # 0093 # 1. This LICENSE AGREEMENT is between the Python Software Foundation 0094 # ("PSF"), and the Individual or Organization ("Licensee") accessing and 0095 # otherwise using this software ("Python") in source or binary form and 0096 # its associated documentation. 0097 # 0098 # 2. Subject to the terms and conditions of this License Agreement, PSF hereby 0099 # grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, 0100 # analyze, test, perform and/or display publicly, prepare derivative works, 0101 # distribute, and otherwise use Python alone or in any derivative version, 0102 # provided, however, that PSF's License Agreement and PSF's notice of copyright, 0103 # i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 0104 # 2011, 2012, 2013 Python Software Foundation; All Rights Reserved" are retained 0105 # in Python alone or in any derivative version prepared by Licensee. 0106 # 0107 # 3. In the event Licensee prepares a derivative work that is based on 0108 # or incorporates Python or any part thereof, and wants to make 0109 # the derivative work available to others as provided herein, then 0110 # Licensee hereby agrees to include in any such work a brief summary of 0111 # the changes made to Python. 0112 # 0113 # 4. PSF is making Python available to Licensee on an "AS IS" 0114 # basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 0115 # IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND 0116 # DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 0117 # FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT 0118 # INFRINGE ANY THIRD PARTY RIGHTS. 0119 # 0120 # 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 0121 # FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 0122 # A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, 0123 # OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 0124 # 0125 # 6. This License Agreement will automatically terminate upon a material 0126 # breach of its terms and conditions. 0127 # 0128 # 7. Nothing in this License Agreement shall be deemed to create any 0129 # relationship of agency, partnership, or joint venture between PSF and 0130 # Licensee. This License Agreement does not grant permission to use PSF 0131 # trademarks or trade name in a trademark sense to endorse or promote 0132 # products or services of Licensee, or any third party. 0133 # 0134 # 8. By copying, installing or otherwise using Python, Licensee 0135 # agrees to be bound by the terms and conditions of this License 0136 # Agreement. 0137 # 0138 # 0139 # BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 0140 # ------------------------------------------- 0141 # 0142 # BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 0143 # 0144 # 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an 0145 # office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the 0146 # Individual or Organization ("Licensee") accessing and otherwise using 0147 # this software in source or binary form and its associated 0148 # documentation ("the Software"). 0149 # 0150 # 2. Subject to the terms and conditions of this BeOpen Python License 0151 # Agreement, BeOpen hereby grants Licensee a non-exclusive, 0152 # royalty-free, world-wide license to reproduce, analyze, test, perform 0153 # and/or display publicly, prepare derivative works, distribute, and 0154 # otherwise use the Software alone or in any derivative version, 0155 # provided, however, that the BeOpen Python License is retained in the 0156 # Software, alone or in any derivative version prepared by Licensee. 0157 # 0158 # 3. BeOpen is making the Software available to Licensee on an "AS IS" 0159 # basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 0160 # IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND 0161 # DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 0162 # FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT 0163 # INFRINGE ANY THIRD PARTY RIGHTS. 0164 # 0165 # 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE 0166 # SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS 0167 # AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY 0168 # DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 0169 # 0170 # 5. This License Agreement will automatically terminate upon a material 0171 # breach of its terms and conditions. 0172 # 0173 # 6. This License Agreement shall be governed by and interpreted in all 0174 # respects by the law of the State of California, excluding conflict of 0175 # law provisions. Nothing in this License Agreement shall be deemed to 0176 # create any relationship of agency, partnership, or joint venture 0177 # between BeOpen and Licensee. This License Agreement does not grant 0178 # permission to use BeOpen trademarks or trade names in a trademark 0179 # sense to endorse or promote products or services of Licensee, or any 0180 # third party. As an exception, the "BeOpen Python" logos available at 0181 # http://www.pythonlabs.com/logos.html may be used according to the 0182 # permissions granted on that web page. 0183 # 0184 # 7. By copying, installing or otherwise using the software, Licensee 0185 # agrees to be bound by the terms and conditions of this License 0186 # Agreement. 0187 # 0188 # 0189 # CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 0190 # --------------------------------------- 0191 # 0192 # 1. This LICENSE AGREEMENT is between the Corporation for National 0193 # Research Initiatives, having an office at 1895 Preston White Drive, 0194 # Reston, VA 20191 ("CNRI"), and the Individual or Organization 0195 # ("Licensee") accessing and otherwise using Python 1.6.1 software in 0196 # source or binary form and its associated documentation. 0197 # 0198 # 2. Subject to the terms and conditions of this License Agreement, CNRI 0199 # hereby grants Licensee a nonexclusive, royalty-free, world-wide 0200 # license to reproduce, analyze, test, perform and/or display publicly, 0201 # prepare derivative works, distribute, and otherwise use Python 1.6.1 0202 # alone or in any derivative version, provided, however, that CNRI's 0203 # License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) 0204 # 1995-2001 Corporation for National Research Initiatives; All Rights 0205 # Reserved" are retained in Python 1.6.1 alone or in any derivative 0206 # version prepared by Licensee. Alternately, in lieu of CNRI's License 0207 # Agreement, Licensee may substitute the following text (omitting the 0208 # quotes): "Python 1.6.1 is made available subject to the terms and 0209 # conditions in CNRI's License Agreement. This Agreement together with 0210 # Python 1.6.1 may be located on the Internet using the following 0211 # unique, persistent identifier (known as a handle): 1895.22/1013. This 0212 # Agreement may also be obtained from a proxy server on the Internet 0213 # using the following URL: http://hdl.handle.net/1895.22/1013". 0214 # 0215 # 3. In the event Licensee prepares a derivative work that is based on 0216 # or incorporates Python 1.6.1 or any part thereof, and wants to make 0217 # the derivative work available to others as provided herein, then 0218 # Licensee hereby agrees to include in any such work a brief summary of 0219 # the changes made to Python 1.6.1. 0220 # 0221 # 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" 0222 # basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 0223 # IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND 0224 # DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 0225 # FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT 0226 # INFRINGE ANY THIRD PARTY RIGHTS. 0227 # 0228 # 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 0229 # 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 0230 # A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, 0231 # OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 0232 # 0233 # 6. This License Agreement will automatically terminate upon a material 0234 # breach of its terms and conditions. 0235 # 0236 # 7. This License Agreement shall be governed by the federal 0237 # intellectual property law of the United States, including without 0238 # limitation the federal copyright law, and, to the extent such 0239 # U.S. federal law does not apply, by the law of the Commonwealth of 0240 # Virginia, excluding Virginia's conflict of law provisions. 0241 # Notwithstanding the foregoing, with regard to derivative works based 0242 # on Python 1.6.1 that incorporate non-separable material that was 0243 # previously distributed under the GNU General Public License (GPL), the 0244 # law of the Commonwealth of Virginia shall govern this License 0245 # Agreement only as to issues arising under or with respect to 0246 # Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this 0247 # License Agreement shall be deemed to create any relationship of 0248 # agency, partnership, or joint venture between CNRI and Licensee. This 0249 # License Agreement does not grant permission to use CNRI trademarks or 0250 # trade name in a trademark sense to endorse or promote products or 0251 # services of Licensee, or any third party. 0252 # 0253 # 8. By clicking on the "ACCEPT" button where indicated, or by copying, 0254 # installing or otherwise using Python 1.6.1, Licensee agrees to be 0255 # bound by the terms and conditions of this License Agreement. 0256 # 0257 # ACCEPT 0258 # 0259 # 0260 # CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 0261 # -------------------------------------------------- 0262 # 0263 # Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, 0264 # The Netherlands. All rights reserved. 0265 # 0266 # Permission to use, copy, modify, and distribute this software and its 0267 # documentation for any purpose and without fee is hereby granted, 0268 # provided that the above copyright notice appear in all copies and that 0269 # both that copyright notice and this permission notice appear in 0270 # supporting documentation, and that the name of Stichting Mathematisch 0271 # Centrum or CWI not be used in advertising or publicity pertaining to 0272 # distribution of the software without specific, written prior 0273 # permission. 0274 # 0275 # STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO 0276 # THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 0277 # FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE 0278 # FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 0279 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 0280 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 0281 # OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 0282 """Heap queue algorithm (a.k.a. priority queue). 0283 0284 Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for 0285 all k, counting elements from 0. For the sake of comparison, 0286 non-existing elements are considered to be infinite. The interesting 0287 property of a heap is that a[0] is always its smallest element. 0288 0289 Usage: 0290 0291 heap = [] # creates an empty heap 0292 heappush(heap, item) # pushes a new item on the heap 0293 item = heappop(heap) # pops the smallest item from the heap 0294 item = heap[0] # smallest item on the heap without popping it 0295 heapify(x) # transforms list into a heap, in-place, in linear time 0296 item = heapreplace(heap, item) # pops and returns smallest item, and adds 0297 # new item; the heap size is unchanged 0298 0299 Our API differs from textbook heap algorithms as follows: 0300 0301 - We use 0-based indexing. This makes the relationship between the 0302 index for a node and the indexes for its children slightly less 0303 obvious, but is more suitable since Python uses 0-based indexing. 0304 0305 - Our heappop() method returns the smallest item, not the largest. 0306 0307 These two make it possible to view the heap as a regular Python list 0308 without surprises: heap[0] is the smallest item, and heap.sort() 0309 maintains the heap invariant! 0310 """ 0311 0312 # Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger 0313 0314 __about__ = """Heap queues 0315 0316 [explanation by François Pinard] 0317 0318 Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for 0319 all k, counting elements from 0. For the sake of comparison, 0320 non-existing elements are considered to be infinite. The interesting 0321 property of a heap is that a[0] is always its smallest element. 0322 0323 The strange invariant above is meant to be an efficient memory 0324 representation for a tournament. The numbers below are `k', not a[k]: 0325 0326 0 0327 0328 1 2 0329 0330 3 4 5 6 0331 0332 7 8 9 10 11 12 13 14 0333 0334 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 0335 0336 0337 In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In 0338 an usual binary tournament we see in sports, each cell is the winner 0339 over the two cells it tops, and we can trace the winner down the tree 0340 to see all opponents s/he had. However, in many computer applications 0341 of such tournaments, we do not need to trace the history of a winner. 0342 To be more memory efficient, when a winner is promoted, we try to 0343 replace it by something else at a lower level, and the rule becomes 0344 that a cell and the two cells it tops contain three different items, 0345 but the top cell "wins" over the two topped cells. 0346 0347 If this heap invariant is protected at all time, index 0 is clearly 0348 the overall winner. The simplest algorithmic way to remove it and 0349 find the "next" winner is to move some loser (let's say cell 30 in the 0350 diagram above) into the 0 position, and then percolate this new 0 down 0351 the tree, exchanging values, until the invariant is re-established. 0352 This is clearly logarithmic on the total number of items in the tree. 0353 By iterating over all items, you get an O(n ln n) sort. 0354 0355 A nice feature of this sort is that you can efficiently insert new 0356 items while the sort is going on, provided that the inserted items are 0357 not "better" than the last 0'th element you extracted. This is 0358 especially useful in simulation contexts, where the tree holds all 0359 incoming events, and the "win" condition means the smallest scheduled 0360 time. When an event schedule other events for execution, they are 0361 scheduled into the future, so they can easily go into the heap. So, a 0362 heap is a good structure for implementing schedulers (this is what I 0363 used for my MIDI sequencer :-). 0364 0365 Various structures for implementing schedulers have been extensively 0366 studied, and heaps are good for this, as they are reasonably speedy, 0367 the speed is almost constant, and the worst case is not much different 0368 than the average case. However, there are other representations which 0369 are more efficient overall, yet the worst cases might be terrible. 0370 0371 Heaps are also very useful in big disk sorts. You most probably all 0372 know that a big sort implies producing "runs" (which are pre-sorted 0373 sequences, which size is usually related to the amount of CPU memory), 0374 followed by a merging passes for these runs, which merging is often 0375 very cleverly organised[1]. It is very important that the initial 0376 sort produces the longest runs possible. Tournaments are a good way 0377 to that. If, using all the memory available to hold a tournament, you 0378 replace and percolate items that happen to fit the current run, you'll 0379 produce runs which are twice the size of the memory for random input, 0380 and much better for input fuzzily ordered. 0381 0382 Moreover, if you output the 0'th item on disk and get an input which 0383 may not fit in the current tournament (because the value "wins" over 0384 the last output value), it cannot fit in the heap, so the size of the 0385 heap decreases. The freed memory could be cleverly reused immediately 0386 for progressively building a second heap, which grows at exactly the 0387 same rate the first heap is melting. When the first heap completely 0388 vanishes, you switch heaps and start a new run. Clever and quite 0389 effective! 0390 0391 In a word, heaps are useful memory structures to know. I use them in 0392 a few applications, and I think it is good to keep a `heap' module 0393 around. :-) 0394 0395 -------------------- 0396 [1] The disk balancing algorithms which are current, nowadays, are 0397 more annoying than clever, and this is a consequence of the seeking 0398 capabilities of the disks. On devices which cannot seek, like big 0399 tape drives, the story was quite different, and one had to be very 0400 clever to ensure (far in advance) that each tape movement will be the 0401 most effective possible (that is, will best participate at 0402 "progressing" the merge). Some tapes were even able to read 0403 backwards, and this was also used to avoid the rewinding time. 0404 Believe me, real good tape sorts were quite spectacular to watch! 0405 From all times, sorting has always been a Great Art! :-) 0406 """ 0407 0408 __all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge', 0409 'nlargest', 'nsmallest', 'heappushpop'] 0410 0411 def heappush(heap, item): 0412 """Push item onto heap, maintaining the heap invariant.""" 0413 heap.append(item) 0414 _siftdown(heap, 0, len(heap)-1) 0415 0416 def heappop(heap): 0417 """Pop the smallest item off the heap, maintaining the heap invariant.""" 0418 lastelt = heap.pop() # raises appropriate IndexError if heap is empty 0419 if heap: 0420 returnitem = heap[0] 0421 heap[0] = lastelt 0422 _siftup(heap, 0) 0423 return returnitem 0424 return lastelt 0425 0426 def heapreplace(heap, item): 0427 """Pop and return the current smallest value, and add the new item. 0428 0429 This is more efficient than heappop() followed by heappush(), and can be 0430 more appropriate when using a fixed-size heap. Note that the value 0431 returned may be larger than item! That constrains reasonable uses of 0432 this routine unless written as part of a conditional replacement: 0433 0434 if item > heap[0]: 0435 item = heapreplace(heap, item) 0436 """ 0437 returnitem = heap[0] # raises appropriate IndexError if heap is empty 0438 heap[0] = item 0439 _siftup(heap, 0) 0440 return returnitem 0441 0442 def heappushpop(heap, item): 0443 """Fast version of a heappush followed by a heappop.""" 0444 if heap and heap[0] < item: 0445 item, heap[0] = heap[0], item 0446 _siftup(heap, 0) 0447 return item 0448 0449 def heapify(x): 0450 """Transform list into a heap, in-place, in O(len(x)) time.""" 0451 n = len(x) 0452 # Transform bottom-up. The largest index there's any point to looking at 0453 # is the largest with a child index in-range, so must have 2*i + 1 < n, 0454 # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so 0455 # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is 0456 # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1. 0457 for i in reversed(range(n//2)): 0458 _siftup(x, i) 0459 0460 def _heappop_max(heap): 0461 """Maxheap version of a heappop.""" 0462 lastelt = heap.pop() # raises appropriate IndexError if heap is empty 0463 if heap: 0464 returnitem = heap[0] 0465 heap[0] = lastelt 0466 _siftup_max(heap, 0) 0467 return returnitem 0468 return lastelt 0469 0470 def _heapreplace_max(heap, item): 0471 """Maxheap version of a heappop followed by a heappush.""" 0472 returnitem = heap[0] # raises appropriate IndexError if heap is empty 0473 heap[0] = item 0474 _siftup_max(heap, 0) 0475 return returnitem 0476 0477 def _heapify_max(x): 0478 """Transform list into a maxheap, in-place, in O(len(x)) time.""" 0479 n = len(x) 0480 for i in reversed(range(n//2)): 0481 _siftup_max(x, i) 0482 0483 # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos 0484 # is the index of a leaf with a possibly out-of-order value. Restore the 0485 # heap invariant. 0486 def _siftdown(heap, startpos, pos): 0487 newitem = heap[pos] 0488 # Follow the path to the root, moving parents down until finding a place 0489 # newitem fits. 0490 while pos > startpos: 0491 parentpos = (pos - 1) >> 1 0492 parent = heap[parentpos] 0493 if newitem < parent: 0494 heap[pos] = parent 0495 pos = parentpos 0496 continue 0497 break 0498 heap[pos] = newitem 0499 0500 # The child indices of heap index pos are already heaps, and we want to make 0501 # a heap at index pos too. We do this by bubbling the smaller child of 0502 # pos up (and so on with that child's children, etc) until hitting a leaf, 0503 # then using _siftdown to move the oddball originally at index pos into place. 0504 # 0505 # We *could* break out of the loop as soon as we find a pos where newitem <= 0506 # both its children, but turns out that's not a good idea, and despite that 0507 # many books write the algorithm that way. During a heap pop, the last array 0508 # element is sifted in, and that tends to be large, so that comparing it 0509 # against values starting from the root usually doesn't pay (= usually doesn't 0510 # get us out of the loop early). See Knuth, Volume 3, where this is 0511 # explained and quantified in an exercise. 0512 # 0513 # Cutting the # of comparisons is important, since these routines have no 0514 # way to extract "the priority" from an array element, so that intelligence 0515 # is likely to be hiding in custom comparison methods, or in array elements 0516 # storing (priority, record) tuples. Comparisons are thus potentially 0517 # expensive. 0518 # 0519 # On random arrays of length 1000, making this change cut the number of 0520 # comparisons made by heapify() a little, and those made by exhaustive 0521 # heappop() a lot, in accord with theory. Here are typical results from 3 0522 # runs (3 just to demonstrate how small the variance is): 0523 # 0524 # Compares needed by heapify Compares needed by 1000 heappops 0525 # -------------------------- -------------------------------- 0526 # 1837 cut to 1663 14996 cut to 8680 0527 # 1855 cut to 1659 14966 cut to 8678 0528 # 1847 cut to 1660 15024 cut to 8703 0529 # 0530 # Building the heap by using heappush() 1000 times instead required 0531 # 2198, 2148, and 2219 compares: heapify() is more efficient, when 0532 # you can use it. 0533 # 0534 # The total compares needed by list.sort() on the same lists were 8627, 0535 # 8627, and 8632 (this should be compared to the sum of heapify() and 0536 # heappop() compares): list.sort() is (unsurprisingly!) more efficient 0537 # for sorting. 0538 0539 def _siftup(heap, pos): 0540 endpos = len(heap) 0541 startpos = pos 0542 newitem = heap[pos] 0543 # Bubble up the smaller child until hitting a leaf. 0544 childpos = 2*pos + 1 # leftmost child position 0545 while childpos < endpos: 0546 # Set childpos to index of smaller child. 0547 rightpos = childpos + 1 0548 if rightpos < endpos and not heap[childpos] < heap[rightpos]: 0549 childpos = rightpos 0550 # Move the smaller child up. 0551 heap[pos] = heap[childpos] 0552 pos = childpos 0553 childpos = 2*pos + 1 0554 # The leaf at pos is empty now. Put newitem there, and bubble it up 0555 # to its final resting place (by sifting its parents down). 0556 heap[pos] = newitem 0557 _siftdown(heap, startpos, pos) 0558 0559 def _siftdown_max(heap, startpos, pos): 0560 'Maxheap variant of _siftdown' 0561 newitem = heap[pos] 0562 # Follow the path to the root, moving parents down until finding a place 0563 # newitem fits. 0564 while pos > startpos: 0565 parentpos = (pos - 1) >> 1 0566 parent = heap[parentpos] 0567 if parent < newitem: 0568 heap[pos] = parent 0569 pos = parentpos 0570 continue 0571 break 0572 heap[pos] = newitem 0573 0574 def _siftup_max(heap, pos): 0575 'Maxheap variant of _siftup' 0576 endpos = len(heap) 0577 startpos = pos 0578 newitem = heap[pos] 0579 # Bubble up the larger child until hitting a leaf. 0580 childpos = 2*pos + 1 # leftmost child position 0581 while childpos < endpos: 0582 # Set childpos to index of larger child. 0583 rightpos = childpos + 1 0584 if rightpos < endpos and not heap[rightpos] < heap[childpos]: 0585 childpos = rightpos 0586 # Move the larger child up. 0587 heap[pos] = heap[childpos] 0588 pos = childpos 0589 childpos = 2*pos + 1 0590 # The leaf at pos is empty now. Put newitem there, and bubble it up 0591 # to its final resting place (by sifting its parents down). 0592 heap[pos] = newitem 0593 _siftdown_max(heap, startpos, pos) 0594 0595 def merge(iterables, key=None, reverse=False): 0596 '''Merge multiple sorted inputs into a single sorted output. 0597 0598 Similar to sorted(itertools.chain(*iterables)) but returns a generator, 0599 does not pull the data into memory all at once, and assumes that each of 0600 the input streams is already sorted (smallest to largest). 0601 0602 >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25])) 0603 [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25] 0604 0605 If *key* is not None, applies a key function to each element to determine 0606 its sort order. 0607 0608 >>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len)) 0609 ['dog', 'cat', 'fish', 'horse', 'kangaroo'] 0610 0611 ''' 0612 0613 h = [] 0614 h_append = h.append 0615 0616 if reverse: 0617 _heapify = _heapify_max 0618 _heappop = _heappop_max 0619 _heapreplace = _heapreplace_max 0620 direction = -1 0621 else: 0622 _heapify = heapify 0623 _heappop = heappop 0624 _heapreplace = heapreplace 0625 direction = 1 0626 0627 if key is None: 0628 for order, it in enumerate(map(iter, iterables)): 0629 try: 0630 h_append([next(it), order * direction, it]) 0631 except StopIteration: 0632 pass 0633 _heapify(h) 0634 while len(h) > 1: 0635 try: 0636 while True: 0637 value, order, it = s = h[0] 0638 yield value 0639 s[0] = next(it) # raises StopIteration when exhausted 0640 _heapreplace(h, s) # restore heap condition 0641 except StopIteration: 0642 _heappop(h) # remove empty iterator 0643 if h: 0644 # fast case when only a single iterator remains 0645 value, order, it = h[0] 0646 yield value 0647 for value in it: 0648 yield value 0649 return 0650 0651 for order, it in enumerate(map(iter, iterables)): 0652 try: 0653 value = next(it) 0654 h_append([key(value), order * direction, value, it]) 0655 except StopIteration: 0656 pass 0657 _heapify(h) 0658 while len(h) > 1: 0659 try: 0660 while True: 0661 key_value, order, value, it = s = h[0] 0662 yield value 0663 value = next(it) 0664 s[0] = key(value) 0665 s[2] = value 0666 _heapreplace(h, s) 0667 except StopIteration: 0668 _heappop(h) 0669 if h: 0670 key_value, order, value, it = h[0] 0671 yield value 0672 for value in it: 0673 yield value 0674 0675 0676 # Algorithm notes for nlargest() and nsmallest() 0677 # ============================================== 0678 # 0679 # Make a single pass over the data while keeping the k most extreme values 0680 # in a heap. Memory consumption is limited to keeping k values in a list. 0681 # 0682 # Measured performance for random inputs: 0683 # 0684 # number of comparisons 0685 # n inputs k-extreme values (average of 5 trials) % more than min() 0686 # ------------- ---------------- --------------------- ----------------- 0687 # 1,000 100 3,317 231.7% 0688 # 10,000 100 14,046 40.5% 0689 # 100,000 100 105,749 5.7% 0690 # 1,000,000 100 1,007,751 0.8% 0691 # 10,000,000 100 10,009,401 0.1% 0692 # 0693 # Theoretical number of comparisons for k smallest of n random inputs: 0694 # 0695 # Step Comparisons Action 0696 # ---- -------------------------- --------------------------- 0697 # 1 1.66 * k heapify the first k-inputs 0698 # 2 n - k compare remaining elements to top of heap 0699 # 3 k * (1 + lg2(k)) * ln(n/k) replace the topmost value on the heap 0700 # 4 k * lg2(k) - (k/2) final sort of the k most extreme values 0701 # 0702 # Combining and simplifying for a rough estimate gives: 0703 # 0704 # comparisons = n + k * (log(k, 2) * log(n/k) + log(k, 2) + log(n/k)) 0705 # 0706 # Computing the number of comparisons for step 3: 0707 # ----------------------------------------------- 0708 # * For the i-th new value from the iterable, the probability of being in the 0709 # k most extreme values is k/i. For example, the probability of the 101st 0710 # value seen being in the 100 most extreme values is 100/101. 0711 # * If the value is a new extreme value, the cost of inserting it into the 0712 # heap is 1 + log(k, 2). 0713 # * The probability times the cost gives: 0714 # (k/i) * (1 + log(k, 2)) 0715 # * Summing across the remaining n-k elements gives: 0716 # sum((k/i) * (1 + log(k, 2)) for i in range(k+1, n+1)) 0717 # * This reduces to: 0718 # (H(n) - H(k)) * k * (1 + log(k, 2)) 0719 # * Where H(n) is the n-th harmonic number estimated by: 0720 # gamma = 0.5772156649 0721 # H(n) = log(n, e) + gamma + 1 / (2 * n) 0722 # http://en.wikipedia.org/wiki/Harmonic_series_(mathematics)#Rate_of_divergence 0723 # * Substituting the H(n) formula: 0724 # comparisons = k * (1 + log(k, 2)) * (log(n/k, e) + (1/n - 1/k) / 2) 0725 # 0726 # Worst-case for step 3: 0727 # ---------------------- 0728 # In the worst case, the input data is reversed sorted so that every new element 0729 # must be inserted in the heap: 0730 # 0731 # comparisons = 1.66 * k + log(k, 2) * (n - k) 0732 # 0733 # Alternative Algorithms 0734 # ---------------------- 0735 # Other algorithms were not used because they: 0736 # 1) Took much more auxiliary memory, 0737 # 2) Made multiple passes over the data. 0738 # 3) Made more comparisons in common cases (small k, large n, semi-random input). 0739 # See the more detailed comparison of approach at: 0740 # http://code.activestate.com/recipes/577573-compare-algorithms-for-heapqsmallest 0741 0742 def nsmallest(n, iterable, key=None): 0743 """Find the n smallest elements in a dataset. 0744 0745 Equivalent to: sorted(iterable, key=key)[:n] 0746 """ 0747 0748 # Short-cut for n==1 is to use min() 0749 if n == 1: 0750 it = iter(iterable) 0751 sentinel = object() 0752 if key is None: 0753 result = min(it, default=sentinel) 0754 else: 0755 result = min(it, default=sentinel, key=key) 0756 return [] if result is sentinel else [result] 0757 0758 # When n>=size, it's faster to use sorted() 0759 try: 0760 size = len(iterable) 0761 except (TypeError, AttributeError): 0762 pass 0763 else: 0764 if n >= size: 0765 return sorted(iterable, key=key)[:n] 0766 0767 # When key is none, use simpler decoration 0768 if key is None: 0769 it = iter(iterable) 0770 # put the range(n) first so that zip() doesn't 0771 # consume one too many elements from the iterator 0772 result = [(elem, i) for i, elem in zip(range(n), it)] 0773 if not result: 0774 return result 0775 _heapify_max(result) 0776 top = result[0][0] 0777 order = n 0778 _heapreplace = _heapreplace_max 0779 for elem in it: 0780 if elem < top: 0781 _heapreplace(result, (elem, order)) 0782 top = result[0][0] 0783 order += 1 0784 result.sort() 0785 return [r[0] for r in result] 0786 0787 # General case, slowest method 0788 it = iter(iterable) 0789 result = [(key(elem), i, elem) for i, elem in zip(range(n), it)] 0790 if not result: 0791 return result 0792 _heapify_max(result) 0793 top = result[0][0] 0794 order = n 0795 _heapreplace = _heapreplace_max 0796 for elem in it: 0797 k = key(elem) 0798 if k < top: 0799 _heapreplace(result, (k, order, elem)) 0800 top = result[0][0] 0801 order += 1 0802 result.sort() 0803 return [r[2] for r in result] 0804 0805 def nlargest(n, iterable, key=None): 0806 """Find the n largest elements in a dataset. 0807 0808 Equivalent to: sorted(iterable, key=key, reverse=True)[:n] 0809 """ 0810 0811 # Short-cut for n==1 is to use max() 0812 if n == 1: 0813 it = iter(iterable) 0814 sentinel = object() 0815 if key is None: 0816 result = max(it, default=sentinel) 0817 else: 0818 result = max(it, default=sentinel, key=key) 0819 return [] if result is sentinel else [result] 0820 0821 # When n>=size, it's faster to use sorted() 0822 try: 0823 size = len(iterable) 0824 except (TypeError, AttributeError): 0825 pass 0826 else: 0827 if n >= size: 0828 return sorted(iterable, key=key, reverse=True)[:n] 0829 0830 # When key is none, use simpler decoration 0831 if key is None: 0832 it = iter(iterable) 0833 result = [(elem, i) for i, elem in zip(range(0, -n, -1), it)] 0834 if not result: 0835 return result 0836 heapify(result) 0837 top = result[0][0] 0838 order = -n 0839 _heapreplace = heapreplace 0840 for elem in it: 0841 if top < elem: 0842 _heapreplace(result, (elem, order)) 0843 top = result[0][0] 0844 order -= 1 0845 result.sort(reverse=True) 0846 return [r[0] for r in result] 0847 0848 # General case, slowest method 0849 it = iter(iterable) 0850 result = [(key(elem), i, elem) for i, elem in zip(range(0, -n, -1), it)] 0851 if not result: 0852 return result 0853 heapify(result) 0854 top = result[0][0] 0855 order = -n 0856 _heapreplace = heapreplace 0857 for elem in it: 0858 k = key(elem) 0859 if top < k: 0860 _heapreplace(result, (k, order, elem)) 0861 top = result[0][0] 0862 order -= 1 0863 result.sort(reverse=True) 0864 return [r[2] for r in result] 0865 0866 # If available, use C implementation 0867 try: 0868 from _heapq import * 0869 except ImportError: 0870 pass 0871 try: 0872 from _heapq import _heapreplace_max 0873 except ImportError: 0874 pass 0875 try: 0876 from _heapq import _heapify_max 0877 except ImportError: 0878 pass 0879 try: 0880 from _heapq import _heappop_max 0881 except ImportError: 0882 pass 0883 0884 0885 if __name__ == "__main__": 0886 import doctest 0887 import sys 0888 (failure_count, test_count) = doctest.testmod() 0889 if failure_count: 0890 sys.exit(-1)
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.1.0 LXR engine. The LXR team |