From 0e4399b053093a68568cb7816092b2a6f6208995 Mon Sep 17 00:00:00 2001
From: David O'Shea <dcoshea@hotmail.com>
Date: Sat, 29 Sep 2012 23:19:08 +0930
Subject: [PATCH 1/4] Use UTF-8 for decoding of sign text from world

When using the dumpSigns command, if non-ASCII characters were present
in the sign text, a UnicodeDecodeError exception would occur when
concatenating the text from the sign with 'u"\n"'.

This fix decodes the sign text as UTF-8, which appears to be
consistent with the behaviour of Minecraft 1.2.5 for the following
sign text as dumped in Python using repr():
'\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3'

Additionally, this fix ensures the output file includes a signature
which makes it easier for tools to work out that the file contains
UTF-8.
---
 mce.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/mce.py b/mce.py
index d95dc8b..e7bafb6 100644
--- a/mce.py
+++ b/mce.py
@@ -604,7 +604,15 @@ class mce(object):
         else:
             filename = self.level.displayName + ".signs"
 
-        outFile = codecs.open(filename, "w", encoding='utf-8')
+        # It appears that Minecraft interprets the sign text as UTF-8,
+        # so we should decode it as such too.
+        decodeSignText = codecs.getdecoder('utf-8')
+        # We happen to encode the output file in UTF-8 too, although
+        # we could use another UTF encoding.  The '-sig' encoding puts
+        # a signature at the start of the output file that tools such
+        # as Microsoft Windows Notepad and Emacs understand to mean
+        # the file has UTF-8 encoding.
+        outFile = codecs.open(filename, "w", encoding='utf-8-sig')
 
         print "Dumping signs..."
         signCount = 0
@@ -621,7 +629,8 @@ class mce(object):
 
                     outFile.write(str(map(lambda x: tileEntity[x].value, "xyz")) + "\n")
                     for i in range(4):
-                        outFile.write(tileEntity["Text{0}".format(i + 1)].value + u"\n")
+                        signText = tileEntity["Text{0}".format(i + 1)].value
+                        outFile.write(decodeSignText(signText)[0] + u"\n")
 
             if i % 100 == 0:
                 print "Chunk {0}...".format(i)

From 96c155909e317d13a13d4b7e1f08efa651a9b60c Mon Sep 17 00:00:00 2001
From: David O'Shea <dcoshea@hotmail.com>
Date: Sat, 29 Sep 2012 23:24:56 +0930
Subject: [PATCH 2/4] Correct references to ChunkMalformed exception in
 dumpSigns/dumpChests

The exception handlers in the dumpSigns and dumpChests commands
referenced ChunkMalformed in the wrong package.

This fix corrects the references, but note that this by itself is not
desirable, as ChunkMalformed is raised when a MemoryError occurs, and
the exception handlers ignore the exception.  As a MemoryError is not
likely to go away by itself, this can result in a flood of error
messages being printed, instead of the existing behaviour where
execution of the command terminates due to the incorrect reference to
ChunkMalformed.
---
 mce.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mce.py b/mce.py
index e7bafb6..0ab70e7 100644
--- a/mce.py
+++ b/mce.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+import mclevelbase
 import mclevel
 import infiniteworld
 import sys
@@ -620,7 +621,7 @@ class mce(object):
         for i, cPos in enumerate(self.level.allChunks):
             try:
                 chunk = self.level.getChunk(*cPos)
-            except mclevel.ChunkMalformed:
+            except mclevelbase.ChunkMalformed:
                 continue
 
             for tileEntity in chunk.TileEntities:
@@ -779,7 +780,7 @@ class mce(object):
         for i, cPos in enumerate(self.level.allChunks):
             try:
                 chunk = self.level.getChunk(*cPos)
-            except mclevel.ChunkMalformed:
+            except mclevelbase.ChunkMalformed:
                 continue
 
             for tileEntity in chunk.TileEntities:

From 3f76761ec59c3fe3521b92db9586a49fb434f529 Mon Sep 17 00:00:00 2001
From: David O'Shea <dcoshea@hotmail.com>
Date: Mon, 1 Oct 2012 08:52:46 +0930
Subject: [PATCH 3/4] Don't turn a MemoryError into a ChunkMalformed exception

In a number of places, all exceptions were caught and a ChunkMalformed
exception raised in their place.  Some code, such as dumpSigns,
continues after a ChunkMalformed exception.  In the case where the
ChunkMalformed exception was a result of a MemoryError, this will most
likely result in a flood of error messages because the MemoryError is
not likely to go away by itself.

This fix causes MemoryError exceptions to not be transformed into
ChunkMalformed exceptions, and hopefully covers all cases where this
was done.  Now, when memory is exhausted, the only detail that is
output is "Exception during command: MemoryError()", rather than the
previous, more detailed output, such as:

"""
ERROR:Malformed NBT data in file: r.4.0.mca index 3776 sector 552 length 1 format deflate ()
ERROR:Incorrect chunk format in file: r.4.0.mca index 3776 sector 552 length 1 format deflate (((MemoryError(),), <traceback object at 0x1C927B98>))
"""

but the particular data that was being processed at the time of the
MemoryError is not likely to be useful information anyway.
---
 infiniteworld.py | 6 ++++++
 schematic.py     | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/infiniteworld.py b/infiniteworld.py
index b2ceed9..fdaf646 100644
--- a/infiniteworld.py
+++ b/infiniteworld.py
@@ -799,6 +799,8 @@ class InfdevChunk(LightedChunk):
             try:
                 self._decompressChunk()
 
+            except MemoryError:
+                raise
             except Exception, e:
                 error(u"Malformed NBT data in file: {0} ({1})".format(self.filename, e))
                 if self.world:
@@ -884,6 +886,8 @@ class InfdevChunk(LightedChunk):
                 self.dataIsPacked = True
                 self.decompress()
 
+            except MemoryError:
+                raise
             except Exception, e:
                 error(u"Incorrect chunk format in file: {0} ({1})".format(self.filename, e))
                 if self.world:
@@ -2731,6 +2735,8 @@ class MCInfdevOldLevel(ChunkedLevelMixin, EntityLevel):
                     data = nbt.gunzip(cdata)
                     chunk.root_tag = nbt.load(buf=data)
 
+        except MemoryError:
+            raise
         except Exception, e:
             raise ChunkMalformed("Chunk {0} had an error: {1!r}".format(chunk.chunkPosition, e), sys.exc_info()[2])
 
diff --git a/schematic.py b/schematic.py
index f91e6fd..9a905f6 100644
--- a/schematic.py
+++ b/schematic.py
@@ -132,6 +132,8 @@ class MCSchematic (EntityLevel):
 
         try:
             self.root_tag = nbt.load(buf=data)
+        except MemoryError:
+             raise
         except Exception, e:
             error(u"Malformed NBT data in schematic file: {0} ({1})".format(self.filename, e))
             raise ChunkMalformed((e, self.filename), sys.exc_info()[2])

From 4378e6af26977a4b9fb87648709037d8d7b2c8e0 Mon Sep 17 00:00:00 2001
From: David O'Shea <dcoshea@hotmail.com>
Date: Mon, 1 Oct 2012 14:43:29 +0930
Subject: [PATCH 4/4] Fix memory leak for Anvil chunks

When compressing a chunk, the uncompressed tag structure in
self.root_tag is thrown away, but an AnvilChunk stores uncompressed
data in four other member variables (_Blocks, _Data, _BlockLight and
_SkyLight) too.

This fix factors out the discarding of self.root_tag into a new method
_discardUncompressed(), and overrides this method in AnvilChunk to
also throw away the Anvil-specific members.
---
 infiniteworld.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/infiniteworld.py b/infiniteworld.py
index fdaf646..67f9add 100644
--- a/infiniteworld.py
+++ b/infiniteworld.py
@@ -720,6 +720,9 @@ class InfdevChunk(LightedChunk):
         root_tag.save(buf=buf)
         return deflate(buf.getvalue())
 
+    def _discardUncompressed(self):
+        self.root_tag = None
+
     def _compressChunk(self):
         root_tag = self.root_tag
         if root_tag is None:
@@ -730,7 +733,7 @@ class InfdevChunk(LightedChunk):
         if self.compressMode == MCRegionFile.VERSION_DEFLATE:
             self.compressedTag = self.compressTagDeflate(root_tag)
 
-        self.root_tag = None
+        self._discardUncompressed()
 
     def decompressTagGzip(self, data):
         return nbt.load(buf=nbt.gunzip(data))
@@ -774,7 +777,7 @@ class InfdevChunk(LightedChunk):
         if not self.dirty:
             # if we are not dirty, just throw the
             # uncompressed tag structure away. rely on the OS disk cache.
-            self.root_tag = None
+            self._discardUncompressed()
         else:
             if self.root_tag is not None:
                 self.sanitizeBlocks()  # xxx
@@ -1113,6 +1116,14 @@ class AnvilChunk(InfdevChunk):
 
                 arr[..., y:y + 16] = secarray.swapaxes(0, 2)
 
+    def _discardUncompressed(self):
+        self._Blocks = None
+        self._Data = None
+        self._BlockLight = None
+        self._SkyLight = None
+
+        super(AnvilChunk, self)._discardUncompressed()
+
     def _compressChunk(self):
         sections = self.root_tag[Level][Sections] = nbt.TAG_List()