From 0e4399b053093a68568cb7816092b2a6f6208995 Mon Sep 17 00:00:00 2001 From: David O'Shea Date: Sat, 29 Sep 2012 23:19:08 +0930 Subject: [PATCH 1/4] Use UTF-8 for decoding of sign text from world When using the dumpSigns command, if non-ASCII characters were present in the sign text, a UnicodeDecodeError exception would occur when concatenating the text from the sign with 'u"\n"'. This fix decodes the sign text as UTF-8, which appears to be consistent with the behaviour of Minecraft 1.2.5 for the following sign text as dumped in Python using repr(): '\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3$\xc2\xa3' Additionally, this fix ensures the output file includes a signature which makes it easier for tools to work out that the file contains UTF-8. --- mce.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mce.py b/mce.py index d95dc8b..e7bafb6 100644 --- a/mce.py +++ b/mce.py @@ -604,7 +604,15 @@ class mce(object): else: filename = self.level.displayName + ".signs" - outFile = codecs.open(filename, "w", encoding='utf-8') + # It appears that Minecraft interprets the sign text as UTF-8, + # so we should decode it as such too. + decodeSignText = codecs.getdecoder('utf-8') + # We happen to encode the output file in UTF-8 too, although + # we could use another UTF encoding. The '-sig' encoding puts + # a signature at the start of the output file that tools such + # as Microsoft Windows Notepad and Emacs understand to mean + # the file has UTF-8 encoding. + outFile = codecs.open(filename, "w", encoding='utf-8-sig') print "Dumping signs..." signCount = 0 @@ -621,7 +629,8 @@ class mce(object): outFile.write(str(map(lambda x: tileEntity[x].value, "xyz")) + "\n") for i in range(4): - outFile.write(tileEntity["Text{0}".format(i + 1)].value + u"\n") + signText = tileEntity["Text{0}".format(i + 1)].value + outFile.write(decodeSignText(signText)[0] + u"\n") if i % 100 == 0: print "Chunk {0}...".format(i) From 96c155909e317d13a13d4b7e1f08efa651a9b60c Mon Sep 17 00:00:00 2001 From: David O'Shea Date: Sat, 29 Sep 2012 23:24:56 +0930 Subject: [PATCH 2/4] Correct references to ChunkMalformed exception in dumpSigns/dumpChests The exception handlers in the dumpSigns and dumpChests commands referenced ChunkMalformed in the wrong package. This fix corrects the references, but note that this by itself is not desirable, as ChunkMalformed is raised when a MemoryError occurs, and the exception handlers ignore the exception. As a MemoryError is not likely to go away by itself, this can result in a flood of error messages being printed, instead of the existing behaviour where execution of the command terminates due to the incorrect reference to ChunkMalformed. --- mce.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mce.py b/mce.py index e7bafb6..0ab70e7 100644 --- a/mce.py +++ b/mce.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +import mclevelbase import mclevel import infiniteworld import sys @@ -620,7 +621,7 @@ class mce(object): for i, cPos in enumerate(self.level.allChunks): try: chunk = self.level.getChunk(*cPos) - except mclevel.ChunkMalformed: + except mclevelbase.ChunkMalformed: continue for tileEntity in chunk.TileEntities: @@ -779,7 +780,7 @@ class mce(object): for i, cPos in enumerate(self.level.allChunks): try: chunk = self.level.getChunk(*cPos) - except mclevel.ChunkMalformed: + except mclevelbase.ChunkMalformed: continue for tileEntity in chunk.TileEntities: From 3f76761ec59c3fe3521b92db9586a49fb434f529 Mon Sep 17 00:00:00 2001 From: David O'Shea Date: Mon, 1 Oct 2012 08:52:46 +0930 Subject: [PATCH 3/4] Don't turn a MemoryError into a ChunkMalformed exception In a number of places, all exceptions were caught and a ChunkMalformed exception raised in their place. Some code, such as dumpSigns, continues after a ChunkMalformed exception. In the case where the ChunkMalformed exception was a result of a MemoryError, this will most likely result in a flood of error messages because the MemoryError is not likely to go away by itself. This fix causes MemoryError exceptions to not be transformed into ChunkMalformed exceptions, and hopefully covers all cases where this was done. Now, when memory is exhausted, the only detail that is output is "Exception during command: MemoryError()", rather than the previous, more detailed output, such as: """ ERROR:Malformed NBT data in file: r.4.0.mca index 3776 sector 552 length 1 format deflate () ERROR:Incorrect chunk format in file: r.4.0.mca index 3776 sector 552 length 1 format deflate (((MemoryError(),), )) """ but the particular data that was being processed at the time of the MemoryError is not likely to be useful information anyway. --- infiniteworld.py | 6 ++++++ schematic.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/infiniteworld.py b/infiniteworld.py index b2ceed9..fdaf646 100644 --- a/infiniteworld.py +++ b/infiniteworld.py @@ -799,6 +799,8 @@ class InfdevChunk(LightedChunk): try: self._decompressChunk() + except MemoryError: + raise except Exception, e: error(u"Malformed NBT data in file: {0} ({1})".format(self.filename, e)) if self.world: @@ -884,6 +886,8 @@ class InfdevChunk(LightedChunk): self.dataIsPacked = True self.decompress() + except MemoryError: + raise except Exception, e: error(u"Incorrect chunk format in file: {0} ({1})".format(self.filename, e)) if self.world: @@ -2731,6 +2735,8 @@ class MCInfdevOldLevel(ChunkedLevelMixin, EntityLevel): data = nbt.gunzip(cdata) chunk.root_tag = nbt.load(buf=data) + except MemoryError: + raise except Exception, e: raise ChunkMalformed("Chunk {0} had an error: {1!r}".format(chunk.chunkPosition, e), sys.exc_info()[2]) diff --git a/schematic.py b/schematic.py index f91e6fd..9a905f6 100644 --- a/schematic.py +++ b/schematic.py @@ -132,6 +132,8 @@ class MCSchematic (EntityLevel): try: self.root_tag = nbt.load(buf=data) + except MemoryError: + raise except Exception, e: error(u"Malformed NBT data in schematic file: {0} ({1})".format(self.filename, e)) raise ChunkMalformed((e, self.filename), sys.exc_info()[2]) From 4378e6af26977a4b9fb87648709037d8d7b2c8e0 Mon Sep 17 00:00:00 2001 From: David O'Shea Date: Mon, 1 Oct 2012 14:43:29 +0930 Subject: [PATCH 4/4] Fix memory leak for Anvil chunks When compressing a chunk, the uncompressed tag structure in self.root_tag is thrown away, but an AnvilChunk stores uncompressed data in four other member variables (_Blocks, _Data, _BlockLight and _SkyLight) too. This fix factors out the discarding of self.root_tag into a new method _discardUncompressed(), and overrides this method in AnvilChunk to also throw away the Anvil-specific members. --- infiniteworld.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/infiniteworld.py b/infiniteworld.py index fdaf646..67f9add 100644 --- a/infiniteworld.py +++ b/infiniteworld.py @@ -720,6 +720,9 @@ class InfdevChunk(LightedChunk): root_tag.save(buf=buf) return deflate(buf.getvalue()) + def _discardUncompressed(self): + self.root_tag = None + def _compressChunk(self): root_tag = self.root_tag if root_tag is None: @@ -730,7 +733,7 @@ class InfdevChunk(LightedChunk): if self.compressMode == MCRegionFile.VERSION_DEFLATE: self.compressedTag = self.compressTagDeflate(root_tag) - self.root_tag = None + self._discardUncompressed() def decompressTagGzip(self, data): return nbt.load(buf=nbt.gunzip(data)) @@ -774,7 +777,7 @@ class InfdevChunk(LightedChunk): if not self.dirty: # if we are not dirty, just throw the # uncompressed tag structure away. rely on the OS disk cache. - self.root_tag = None + self._discardUncompressed() else: if self.root_tag is not None: self.sanitizeBlocks() # xxx @@ -1113,6 +1116,14 @@ class AnvilChunk(InfdevChunk): arr[..., y:y + 16] = secarray.swapaxes(0, 2) + def _discardUncompressed(self): + self._Blocks = None + self._Data = None + self._BlockLight = None + self._SkyLight = None + + super(AnvilChunk, self)._discardUncompressed() + def _compressChunk(self): sections = self.root_tag[Level][Sections] = nbt.TAG_List()