fixing and old unicode bug

the openos io buffer in utf8 mode can splice inside a utf8 sequence
this code prevents that by reading the next chunk to complete the sequence
in the case the stream actually has bad utf8 sequence, the io buffer decides to return
more data than it was asked, rather than corrupt the stream
closes #1207
This commit is contained in:
payonel 2020-05-15 02:28:08 -07:00
parent 66bcd3f46a
commit a13792aef3

View File

@ -142,20 +142,36 @@ function buffer:readBytesOrChars(readChunk, n)
sub = unicode.sub
end
local data = ""
while len(data) ~= n do
if len(self.bufferRead) == 0 then
while true do
local current_data_len = len(data)
local needed = n - current_data_len
if needed < 1 then
break
end
-- if the buffer is empty OR there is only 1 char left, read next chunk
-- this is to protect that last byte from bad unicode
if #self.bufferRead == 0 then
local result, reason = readChunk(self)
if not result then
if reason then
return result, reason
else -- eof
return #data > 0 and data or nil
return current_data_len > 0 and data or nil
end
end
end
local left = n - len(data)
data = data .. sub(self.bufferRead, 1, left)
self.bufferRead = sub(self.bufferRead, left + 1)
local splice = self.bufferRead
if len(self.bufferRead) > needed then
splice = sub(self.bufferRead, 1, needed)
if len(splice) ~= needed then
-- this can happen if the stream does not represent valid utf8 sequences
-- we could search the string for the bad sequence but regardless, we're going to just return the raw data
splice = self.bufferRead -- yes this is more than the user is asking for, but this is better than corrupting the stream
end
-- else -- we will read more chunks
end
data = data .. splice
self.bufferRead = string.sub(self.bufferRead, #splice + 1)
end
return data
end