feat: Winkeyer
This commit is contained in:
@@ -16,6 +16,7 @@ import (
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Record is a single ADIF record. Keys are lowercased field names.
|
||||
@@ -83,6 +84,17 @@ func parseWith(r io.Reader, decodeValue func([]byte) string, fn func(Record) err
|
||||
if _, err := io.ReadFull(br, val); err != nil {
|
||||
return fmt.Errorf("read field %s: %w", name, err)
|
||||
}
|
||||
// Repair character-count lengths. The ADIF spec says LENGTH is a
|
||||
// byte count, but some loggers (notably Log4OM's UDP "ADIF
|
||||
// message") write the CHARACTER count instead. For UTF-8 values
|
||||
// with accented chars that truncates mid-rune — e.g. "<QTH:7>
|
||||
// Tóalmás" is 9 bytes but says 7, leaving an orphan byte that
|
||||
// renders as "Tóalm�". When we're in UTF-8 mode (no Windows-1252
|
||||
// decoder) and the naive byte read isn't valid UTF-8, keep reading
|
||||
// until the value holds `length` whole runes (or the next tag).
|
||||
if decodeValue == nil && !utf8.Valid(val) {
|
||||
val = extendToRunes(br, val, length)
|
||||
}
|
||||
if headerDone && name != "" {
|
||||
if decodeValue != nil {
|
||||
rec[name] = decodeValue(val)
|
||||
@@ -94,6 +106,37 @@ func parseWith(r io.Reader, decodeValue func([]byte) string, fn func(Record) err
|
||||
}
|
||||
}
|
||||
|
||||
// extendToRunes recovers a value whose declared length was a character count
|
||||
// rather than a byte count. `have` holds the first `wantRunes` BYTES of the
|
||||
// value, which turned out to be invalid UTF-8 (a multibyte rune was cut). We
|
||||
// append bytes from br until the value holds `wantRunes` complete runes — or
|
||||
// until the next '<' (start of the following tag) / EOF, so we never cross
|
||||
// into another field. Capped so a genuinely-corrupt value can't run away.
|
||||
func extendToRunes(br *bufio.Reader, have []byte, wantRunes int) []byte {
|
||||
const maxExtra = 8 // at most ~4 extra bytes/rune for the few cut runes
|
||||
limit := len(have) + maxExtra*wantRunes + maxExtra
|
||||
for len(have) < limit {
|
||||
// Stop only when the value is complete UTF-8 (no partial trailing
|
||||
// rune) AND holds enough runes. Checking utf8.RuneCount alone is a
|
||||
// trap: a trailing orphan lead byte (e.g. the D0 of a cut Cyrillic
|
||||
// "а") counts as one rune, so the loop would stop one continuation
|
||||
// byte short → "Чайк�". Requiring utf8.Valid forces us to read it.
|
||||
if utf8.Valid(have) && utf8.RuneCount(have) >= wantRunes {
|
||||
break
|
||||
}
|
||||
b, err := br.ReadByte()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
if b == '<' {
|
||||
_ = br.UnreadByte() // belongs to the next tag — leave it
|
||||
break
|
||||
}
|
||||
have = append(have, b)
|
||||
}
|
||||
return have
|
||||
}
|
||||
|
||||
// parseSpec splits "callsign:5", "callsign:5:S" or "eor" into name and length.
|
||||
// name is lowercased; length is 0 for control tags or when missing.
|
||||
func parseSpec(spec string) (name string, length int) {
|
||||
|
||||
Reference in New Issue
Block a user