fix multibyte decoding protocol bug

author Christophe Rhodes <csr21@cantab.net>

Fri, 3 Oct 2014 17:03:52 +0000 (18:03 +0100)

committer Christophe Rhodes <csr21@cantab.net>

Fri, 3 Oct 2014 17:03:52 +0000 (18:03 +0100)
author Christophe Rhodes <csr21@cantab.net>
Fri, 3 Oct 2014 17:03:52 +0000 (18:03 +0100)
committer Christophe Rhodes <csr21@cantab.net>
Fri, 3 Oct 2014 17:03:52 +0000 (18:03 +0100)
diff --git a/BUGS.org b/BUGS.org

index 5ec0d117ec2698cc416004e03b084426baf7a37c..3da8de254aff05342dea1a5c090833bca821505e 100644 (file)
--- a/BUGS.org
+++ b/BUGS.org
@@ -15,7 +15,7 @@
    with a reference to the corresponding source.  Unfortunately, emacs
    only passes the buffer position in bytes (or maybe characters),
    whereas R's srcrefs work with lines and columns.
-* OPEN #4 multibyte characters corrupt slime connection              :NORMAL:
+* RESOLVED #4 multibyte characters corrupt slime connection          :NORMAL:
    Not in all circumstances (e.g. ="£"= is OK) but =1:£= fails in
    slime-net-read-or-lose.
  * RESOLVED #5 respect visibility of evaluated results        :WISHLIST:FIXED:
@@ -61,7 +61,7 @@
  * OPEN #16 ESS configuration                                          :MINOR:
    sorting out the function regexp at least, but generally reducing
    dependence might be good.
-* OPEN #17 encoding / external-format confusion                      :NORMAL:
+* RESOLVED #17 encoding / external-format confusion                  :NORMAL:
    We declare ourselves capable of handling utf-8-unix encoding, but
    whether we actually do anything close to being correct is unclear.
    (Almost certainly not; I suspect we naïvely use nchar() in places).
diff --git a/swank.R b/swank.R

index 57ead0ac9f803a1d2729f737d739c0d4a10fcb69..901f1c16a5bc7f1af0015a73b7bd352c4adf36b3 100644 (file)
--- a/swank.R
+++ b/swank.R
@@ -141,19 +141,27 @@ readPacket <- function(io) {
    header <- readChunk(io, 6)
    len <- strtoi(header, base=16)
    payload <- readChunk(io, len)
-  readSexpFromString(payload)
+  sexp <- readSexpFromString(payload)
+  sexp
  }
  
  readChunk <- function(io, len) {
-  buffer <- readChar(io, len)
+  buffer <- readChar(io, len, useBytes=TRUE)
    if(length(buffer) == 0) {
      condition <- simpleCondition("End of file on io")
      class(condition) <- c("endOfFile", class(condition))
      signalCondition(condition)
    }
-  if(nchar(buffer) != len) {
-    stop("short read in readChunk")
-  }
+  ## FIXME: with the useBytes argument to readChar, it is normal for
+  ## the buffer returned to be fewer character than bytes were read,
+  ## given the possibility of multibyte characters.  However, that
+  ## means we can’t detect at all the case where there is actually a
+  ## short read (though empirically the readChar call blocks rather
+  ## than returning early).
+  ##
+  ## if(nchar(buffer) != len) {
+  ##   stop("short read in readChunk")
+  ## }
    buffer
  }
author	Christophe Rhodes <csr21@cantab.net>
	Fri, 3 Oct 2014 17:03:52 +0000 (18:03 +0100)
committer	Christophe Rhodes <csr21@cantab.net>
	Fri, 3 Oct 2014 17:03:52 +0000 (18:03 +0100)