feat: Winkeyer

2026-06-02 01:17:26 +02:00
parent 2eb77370e4
commit 2b4326b553
26 changed files with 3125 additions and 645 deletions
@@ -16,6 +16,7 @@ import (
 	"io"
 	"strconv"
 	"strings"
+	"unicode/utf8"
 )

 // Record is a single ADIF record. Keys are lowercased field names.
@@ -83,6 +84,17 @@ func parseWith(r io.Reader, decodeValue func([]byte) string, fn func(Record) err
 			if _, err := io.ReadFull(br, val); err != nil {
 				return fmt.Errorf("read field %s: %w", name, err)
 			}
+			// Repair character-count lengths. The ADIF spec says LENGTH is a
+			// byte count, but some loggers (notably Log4OM's UDP "ADIF
+			// message") write the CHARACTER count instead. For UTF-8 values
+			// with accented chars that truncates mid-rune — e.g. "<QTH:7>
+			// Tóalmás" is 9 bytes but says 7, leaving an orphan byte that
+			// renders as "Tóalm�". When we're in UTF-8 mode (no Windows-1252
+			// decoder) and the naive byte read isn't valid UTF-8, keep reading
+			// until the value holds `length` whole runes (or the next tag).
+			if decodeValue == nil && !utf8.Valid(val) {
+				val = extendToRunes(br, val, length)
+			}
 			if headerDone && name != "" {
 				if decodeValue != nil {
 					rec[name] = decodeValue(val)
@@ -94,6 +106,37 @@ func parseWith(r io.Reader, decodeValue func([]byte) string, fn func(Record) err
 	}
 }

+// extendToRunes recovers a value whose declared length was a character count
+// rather than a byte count. `have` holds the first `wantRunes` BYTES of the
+// value, which turned out to be invalid UTF-8 (a multibyte rune was cut). We
+// append bytes from br until the value holds `wantRunes` complete runes — or
+// until the next '<' (start of the following tag) / EOF, so we never cross
+// into another field. Capped so a genuinely-corrupt value can't run away.
+func extendToRunes(br *bufio.Reader, have []byte, wantRunes int) []byte {
+	const maxExtra = 8 // at most ~4 extra bytes/rune for the few cut runes
+	limit := len(have) + maxExtra*wantRunes + maxExtra
+	for len(have) < limit {
+		// Stop only when the value is complete UTF-8 (no partial trailing
+		// rune) AND holds enough runes. Checking utf8.RuneCount alone is a
+		// trap: a trailing orphan lead byte (e.g. the D0 of a cut Cyrillic
+		// "а") counts as one rune, so the loop would stop one continuation
+		// byte short → "Чайк�". Requiring utf8.Valid forces us to read it.
+		if utf8.Valid(have) && utf8.RuneCount(have) >= wantRunes {
+			break
+		}
+		b, err := br.ReadByte()
+		if err != nil {
+			break
+		}
+		if b == '<' {
+			_ = br.UnreadByte() // belongs to the next tag — leave it
+			break
+		}
+		have = append(have, b)
+	}
+	return have
+}
+
 // parseSpec splits "callsign:5", "callsign:5:S" or "eor" into name and length.
 // name is lowercased; length is 0 for control tags or when missing.
 func parseSpec(spec string) (name string, length int) {