0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 __all__ = ["StorageLevel"]
0019
0020
0021 class StorageLevel(object):
0022
0023 """
0024 Flags for controlling the storage of an RDD. Each StorageLevel records whether to use memory,
0025 whether to drop the RDD to disk if it falls out of memory, whether to keep the data in memory
0026 in a JAVA-specific serialized format, and whether to replicate the RDD partitions on multiple
0027 nodes. Also contains static constants for some commonly used storage levels, MEMORY_ONLY.
0028 Since the data is always serialized on the Python side, all the constants use the serialized
0029 formats.
0030 """
0031
0032 def __init__(self, useDisk, useMemory, useOffHeap, deserialized, replication=1):
0033 self.useDisk = useDisk
0034 self.useMemory = useMemory
0035 self.useOffHeap = useOffHeap
0036 self.deserialized = deserialized
0037 self.replication = replication
0038
0039 def __repr__(self):
0040 return "StorageLevel(%s, %s, %s, %s, %s)" % (
0041 self.useDisk, self.useMemory, self.useOffHeap, self.deserialized, self.replication)
0042
0043 def __str__(self):
0044 result = ""
0045 result += "Disk " if self.useDisk else ""
0046 result += "Memory " if self.useMemory else ""
0047 result += "OffHeap " if self.useOffHeap else ""
0048 result += "Deserialized " if self.deserialized else "Serialized "
0049 result += "%sx Replicated" % self.replication
0050 return result
0051
0052 StorageLevel.DISK_ONLY = StorageLevel(True, False, False, False)
0053 StorageLevel.DISK_ONLY_2 = StorageLevel(True, False, False, False, 2)
0054 StorageLevel.MEMORY_ONLY = StorageLevel(False, True, False, False)
0055 StorageLevel.MEMORY_ONLY_2 = StorageLevel(False, True, False, False, 2)
0056 StorageLevel.MEMORY_AND_DISK = StorageLevel(True, True, False, False)
0057 StorageLevel.MEMORY_AND_DISK_2 = StorageLevel(True, True, False, False, 2)
0058 StorageLevel.OFF_HEAP = StorageLevel(True, True, True, False, 1)