// Package adif handles ADIF import and export (ADI text format). // // ADI tokenisation rules (per ADIF spec): // - Free-form text is allowed up to the first (header end). // - After , records are sequences of VALUE // terminated by . // - The LENGTH is the byte count of the VALUE that immediately follows // the closing '>' (no separator). // - Tag names are case-insensitive. // - Bytes between fields (whitespace, junk) are ignored. package adif import ( "bufio" "fmt" "io" "strconv" "strings" "unicode/utf8" ) // Record is a single ADIF record. Keys are lowercased field names. type Record map[string]string // Parse reads an ADI stream and invokes fn for each record (after ). // Returning a non-nil error from fn stops parsing and is propagated. // The header (text before ) is silently discarded. func Parse(r io.Reader, fn func(Record) error) error { return parseWith(r, nil, fn) } // ParseWithDecoder is like Parse but applies decodeValue to each field's // raw bytes before storing as a string. ADIF field lengths are byte // counts in the file's native encoding, so decoding MUST happen after // reading exactly N bytes — wrapping the reader in a decoder would shift // byte boundaries and chop multibyte chars in half (e.g. "YAOUNDÉ" // in Windows-1252 is 7 bytes; after upfront decoding it'd be 8 bytes of // UTF-8 and the parser would only read the first 7, splitting É). func ParseWithDecoder(r io.Reader, decodeValue func([]byte) string, fn func(Record) error) error { return parseWith(r, decodeValue, fn) } func parseWith(r io.Reader, decodeValue func([]byte) string, fn func(Record) error) error { br := bufio.NewReaderSize(r, 64*1024) rec := Record{} headerDone := false for { // Seek next '<'. Bytes before it are either header text or // inter-field whitespace — both discardable. if err := seekByte(br, '<'); err != nil { if err == io.EOF { return nil } return err } spec, err := readUntilByte(br, '>') if err != nil { if err == io.EOF { return nil } return fmt.Errorf("unterminated tag: %w", err) } name, length := parseSpec(spec) switch name { case "eoh": headerDone = true rec = Record{} continue case "eor": if headerDone && len(rec) > 0 { if err := fn(rec); err != nil { return err } } rec = Record{} continue } // Skip value bytes regardless of header state; we only emit // records once we've crossed . if length > 0 { val := make([]byte, length) if _, err := io.ReadFull(br, val); err != nil { return fmt.Errorf("read field %s: %w", name, err) } // Repair character-count lengths. The ADIF spec says LENGTH is a // byte count, but some loggers (notably Log4OM's UDP "ADIF // message") write the CHARACTER count instead. For UTF-8 values // with accented chars that truncates mid-rune — e.g. " // Tóalmás" is 9 bytes but says 7, leaving an orphan byte that // renders as "Tóalm�". When we're in UTF-8 mode (no Windows-1252 // decoder) and the naive byte read isn't valid UTF-8, keep reading // until the value holds `length` whole runes (or the next tag). if decodeValue == nil && !utf8.Valid(val) { val = extendToRunes(br, val, length) } if headerDone && name != "" { if decodeValue != nil { rec[name] = decodeValue(val) } else { rec[name] = string(val) } } } } } // extendToRunes recovers a value whose declared length was a character count // rather than a byte count. `have` holds the first `wantRunes` BYTES of the // value, which turned out to be invalid UTF-8 (a multibyte rune was cut). We // append bytes from br until the value holds `wantRunes` complete runes — or // until the next '<' (start of the following tag) / EOF, so we never cross // into another field. Capped so a genuinely-corrupt value can't run away. func extendToRunes(br *bufio.Reader, have []byte, wantRunes int) []byte { const maxExtra = 8 // at most ~4 extra bytes/rune for the few cut runes limit := len(have) + maxExtra*wantRunes + maxExtra for len(have) < limit { // Stop only when the value is complete UTF-8 (no partial trailing // rune) AND holds enough runes. Checking utf8.RuneCount alone is a // trap: a trailing orphan lead byte (e.g. the D0 of a cut Cyrillic // "а") counts as one rune, so the loop would stop one continuation // byte short → "Чайк�". Requiring utf8.Valid forces us to read it. if utf8.Valid(have) && utf8.RuneCount(have) >= wantRunes { break } b, err := br.ReadByte() if err != nil { break } if b == '<' { _ = br.UnreadByte() // belongs to the next tag — leave it break } have = append(have, b) } return have } // parseSpec splits "callsign:5", "callsign:5:S" or "eor" into name and length. // name is lowercased; length is 0 for control tags or when missing. func parseSpec(spec string) (name string, length int) { parts := strings.SplitN(strings.TrimSpace(spec), ":", 3) name = strings.ToLower(strings.TrimSpace(parts[0])) if len(parts) >= 2 { if n, err := strconv.Atoi(strings.TrimSpace(parts[1])); err == nil && n > 0 { length = n } } return } func seekByte(br *bufio.Reader, target byte) error { for { b, err := br.ReadByte() if err != nil { return err } if b == target { return nil } } } func readUntilByte(br *bufio.Reader, target byte) (string, error) { var sb strings.Builder for { b, err := br.ReadByte() if err != nil { return sb.String(), err } if b == target { return sb.String(), nil } sb.WriteByte(b) } }