178 lines
5.5 KiB
Go
178 lines
5.5 KiB
Go
// Package adif handles ADIF import and export (ADI text format).
|
||
//
|
||
// ADI tokenisation rules (per ADIF spec):
|
||
// - Free-form text is allowed up to the first <EOH> (header end).
|
||
// - After <EOH>, records are sequences of <FIELDNAME:LENGTH[:TYPE]>VALUE
|
||
// terminated by <EOR>.
|
||
// - The LENGTH is the byte count of the VALUE that immediately follows
|
||
// the closing '>' (no separator).
|
||
// - Tag names are case-insensitive.
|
||
// - Bytes between fields (whitespace, junk) are ignored.
|
||
package adif
|
||
|
||
import (
|
||
"bufio"
|
||
"fmt"
|
||
"io"
|
||
"strconv"
|
||
"strings"
|
||
"unicode/utf8"
|
||
)
|
||
|
||
// Record is a single ADIF record. Keys are lowercased field names.
|
||
type Record map[string]string
|
||
|
||
// Parse reads an ADI stream and invokes fn for each record (after <EOH>).
|
||
// Returning a non-nil error from fn stops parsing and is propagated.
|
||
// The header (text before <EOH>) is silently discarded.
|
||
func Parse(r io.Reader, fn func(Record) error) error {
|
||
return parseWith(r, nil, fn)
|
||
}
|
||
|
||
// ParseWithDecoder is like Parse but applies decodeValue to each field's
|
||
// raw bytes before storing as a string. ADIF field lengths are byte
|
||
// counts in the file's native encoding, so decoding MUST happen after
|
||
// reading exactly N bytes — wrapping the reader in a decoder would shift
|
||
// byte boundaries and chop multibyte chars in half (e.g. "<QTH:7>YAOUNDÉ"
|
||
// in Windows-1252 is 7 bytes; after upfront decoding it'd be 8 bytes of
|
||
// UTF-8 and the parser would only read the first 7, splitting É).
|
||
func ParseWithDecoder(r io.Reader, decodeValue func([]byte) string, fn func(Record) error) error {
|
||
return parseWith(r, decodeValue, fn)
|
||
}
|
||
|
||
func parseWith(r io.Reader, decodeValue func([]byte) string, fn func(Record) error) error {
|
||
br := bufio.NewReaderSize(r, 64*1024)
|
||
|
||
rec := Record{}
|
||
headerDone := false
|
||
|
||
for {
|
||
// Seek next '<'. Bytes before it are either header text or
|
||
// inter-field whitespace — both discardable.
|
||
if err := seekByte(br, '<'); err != nil {
|
||
if err == io.EOF {
|
||
return nil
|
||
}
|
||
return err
|
||
}
|
||
spec, err := readUntilByte(br, '>')
|
||
if err != nil {
|
||
if err == io.EOF {
|
||
return nil
|
||
}
|
||
return fmt.Errorf("unterminated tag: %w", err)
|
||
}
|
||
name, length := parseSpec(spec)
|
||
switch name {
|
||
case "eoh":
|
||
headerDone = true
|
||
rec = Record{}
|
||
continue
|
||
case "eor":
|
||
if headerDone && len(rec) > 0 {
|
||
if err := fn(rec); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
rec = Record{}
|
||
continue
|
||
}
|
||
// Skip value bytes regardless of header state; we only emit
|
||
// records once we've crossed <EOH>.
|
||
if length > 0 {
|
||
val := make([]byte, length)
|
||
if _, err := io.ReadFull(br, val); err != nil {
|
||
return fmt.Errorf("read field %s: %w", name, err)
|
||
}
|
||
// Repair character-count lengths. The ADIF spec says LENGTH is a
|
||
// byte count, but some loggers (notably Log4OM's UDP "ADIF
|
||
// message") write the CHARACTER count instead. For UTF-8 values
|
||
// with accented chars that truncates mid-rune — e.g. "<QTH:7>
|
||
// Tóalmás" is 9 bytes but says 7, leaving an orphan byte that
|
||
// renders as "Tóalm�". When we're in UTF-8 mode (no Windows-1252
|
||
// decoder) and the naive byte read isn't valid UTF-8, keep reading
|
||
// until the value holds `length` whole runes (or the next tag).
|
||
if decodeValue == nil && !utf8.Valid(val) {
|
||
val = extendToRunes(br, val, length)
|
||
}
|
||
if headerDone && name != "" {
|
||
if decodeValue != nil {
|
||
rec[name] = decodeValue(val)
|
||
} else {
|
||
rec[name] = string(val)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// extendToRunes recovers a value whose declared length was a character count
|
||
// rather than a byte count. `have` holds the first `wantRunes` BYTES of the
|
||
// value, which turned out to be invalid UTF-8 (a multibyte rune was cut). We
|
||
// append bytes from br until the value holds `wantRunes` complete runes — or
|
||
// until the next '<' (start of the following tag) / EOF, so we never cross
|
||
// into another field. Capped so a genuinely-corrupt value can't run away.
|
||
func extendToRunes(br *bufio.Reader, have []byte, wantRunes int) []byte {
|
||
const maxExtra = 8 // at most ~4 extra bytes/rune for the few cut runes
|
||
limit := len(have) + maxExtra*wantRunes + maxExtra
|
||
for len(have) < limit {
|
||
// Stop only when the value is complete UTF-8 (no partial trailing
|
||
// rune) AND holds enough runes. Checking utf8.RuneCount alone is a
|
||
// trap: a trailing orphan lead byte (e.g. the D0 of a cut Cyrillic
|
||
// "а") counts as one rune, so the loop would stop one continuation
|
||
// byte short → "Чайк�". Requiring utf8.Valid forces us to read it.
|
||
if utf8.Valid(have) && utf8.RuneCount(have) >= wantRunes {
|
||
break
|
||
}
|
||
b, err := br.ReadByte()
|
||
if err != nil {
|
||
break
|
||
}
|
||
if b == '<' {
|
||
_ = br.UnreadByte() // belongs to the next tag — leave it
|
||
break
|
||
}
|
||
have = append(have, b)
|
||
}
|
||
return have
|
||
}
|
||
|
||
// parseSpec splits "callsign:5", "callsign:5:S" or "eor" into name and length.
|
||
// name is lowercased; length is 0 for control tags or when missing.
|
||
func parseSpec(spec string) (name string, length int) {
|
||
parts := strings.SplitN(strings.TrimSpace(spec), ":", 3)
|
||
name = strings.ToLower(strings.TrimSpace(parts[0]))
|
||
if len(parts) >= 2 {
|
||
if n, err := strconv.Atoi(strings.TrimSpace(parts[1])); err == nil && n > 0 {
|
||
length = n
|
||
}
|
||
}
|
||
return
|
||
}
|
||
|
||
func seekByte(br *bufio.Reader, target byte) error {
|
||
for {
|
||
b, err := br.ReadByte()
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if b == target {
|
||
return nil
|
||
}
|
||
}
|
||
}
|
||
|
||
func readUntilByte(br *bufio.Reader, target byte) (string, error) {
|
||
var sb strings.Builder
|
||
for {
|
||
b, err := br.ReadByte()
|
||
if err != nil {
|
||
return sb.String(), err
|
||
}
|
||
if b == target {
|
||
return sb.String(), nil
|
||
}
|
||
sb.WriteByte(b)
|
||
}
|
||
}
|