rigs completed
This commit is contained in:
+50
-6
@@ -1,12 +1,17 @@
|
||||
package adif
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
|
||||
"hamlog/internal/qso"
|
||||
)
|
||||
@@ -33,23 +38,62 @@ type Importer struct {
|
||||
SkipDuplicates bool // when true, records matching an existing or earlier-in-file QSO are skipped; otherwise all are inserted
|
||||
}
|
||||
|
||||
// ImportFile opens the file at path and imports it into the repo.
|
||||
// ImportFile reads the file at path and imports it into the repo. The
|
||||
// whole file is loaded into memory so we can do a definitive UTF-8 check
|
||||
// before parsing — peeking a buffered window misses non-ASCII bytes that
|
||||
// only appear past the header (typical when the ADIF header is pure ASCII
|
||||
// but record fields like NAME/QTH have accented chars in Windows-1252).
|
||||
func (im *Importer) ImportFile(ctx context.Context, path string) (ImportResult, error) {
|
||||
f, err := os.Open(path)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return ImportResult{}, fmt.Errorf("open %s: %w", path, err)
|
||||
}
|
||||
defer f.Close()
|
||||
return im.Import(ctx, f)
|
||||
// Strip UTF-8 BOM if present so the parser sees clean data.
|
||||
data = bytes.TrimPrefix(data, []byte{0xEF, 0xBB, 0xBF})
|
||||
return im.importBytes(ctx, data)
|
||||
}
|
||||
|
||||
// Import streams the ADI content from r into the repo.
|
||||
// pickValueDecoder returns the per-field byte-to-string decoder to use.
|
||||
// If the file is valid UTF-8 we keep the bytes as-is; otherwise we assume
|
||||
// Windows-1252 (de-facto encoding of MixW, Log4OM, HRD and most legacy
|
||||
// Western-European loggers). Decoding has to happen on each field's bytes
|
||||
// individually, NOT by wrapping the reader, because ADIF declares field
|
||||
// lengths in source-encoding bytes — e.g. "<QTH:7>YAOUNDÉ" is 7 bytes in
|
||||
// Windows-1252 (É is one byte 0xC9). Pre-decoding to UTF-8 would make É
|
||||
// two bytes, and the parser reading 7 bytes after the tag would chop the
|
||||
// É in half → "YAOUND" + an orphan 0xC3 byte → "YAOUND�" after JSON.
|
||||
func pickValueDecoder(data []byte) func([]byte) string {
|
||||
if utf8.Valid(data) {
|
||||
return nil // identity
|
||||
}
|
||||
dec := charmap.Windows1252.NewDecoder()
|
||||
return func(b []byte) string {
|
||||
out, err := dec.Bytes(b)
|
||||
if err != nil {
|
||||
return string(b)
|
||||
}
|
||||
return string(out)
|
||||
}
|
||||
}
|
||||
|
||||
// Import streams the ADI content from r into the repo. Assumes UTF-8;
|
||||
// callers that may receive other encodings should go through ImportFile.
|
||||
func (im *Importer) Import(ctx context.Context, r interface {
|
||||
Read(p []byte) (int, error)
|
||||
}) (ImportResult, error) {
|
||||
data, err := io.ReadAll(r)
|
||||
if err != nil {
|
||||
return ImportResult{}, fmt.Errorf("read input: %w", err)
|
||||
}
|
||||
data = bytes.TrimPrefix(data, []byte{0xEF, 0xBB, 0xBF})
|
||||
return im.importBytes(ctx, data)
|
||||
}
|
||||
|
||||
func (im *Importer) importBytes(ctx context.Context, data []byte) (ImportResult, error) {
|
||||
if im.BatchSize <= 0 {
|
||||
im.BatchSize = 500
|
||||
}
|
||||
decode := pickValueDecoder(data)
|
||||
res := ImportResult{}
|
||||
batch := make([]qso.QSO, 0, im.BatchSize)
|
||||
|
||||
@@ -73,7 +117,7 @@ func (im *Importer) Import(ctx context.Context, r interface {
|
||||
return err
|
||||
}
|
||||
|
||||
err = Parse(r, func(rec Record) error {
|
||||
err = ParseWithDecoder(bytes.NewReader(data), decode, func(rec Record) error {
|
||||
res.Total++
|
||||
q, ok := recordToQSO(rec)
|
||||
if !ok {
|
||||
|
||||
+20
-1
@@ -25,6 +25,21 @@ type Record map[string]string
|
||||
// Returning a non-nil error from fn stops parsing and is propagated.
|
||||
// The header (text before <EOH>) is silently discarded.
|
||||
func Parse(r io.Reader, fn func(Record) error) error {
|
||||
return parseWith(r, nil, fn)
|
||||
}
|
||||
|
||||
// ParseWithDecoder is like Parse but applies decodeValue to each field's
|
||||
// raw bytes before storing as a string. ADIF field lengths are byte
|
||||
// counts in the file's native encoding, so decoding MUST happen after
|
||||
// reading exactly N bytes — wrapping the reader in a decoder would shift
|
||||
// byte boundaries and chop multibyte chars in half (e.g. "<QTH:7>YAOUNDÉ"
|
||||
// in Windows-1252 is 7 bytes; after upfront decoding it'd be 8 bytes of
|
||||
// UTF-8 and the parser would only read the first 7, splitting É).
|
||||
func ParseWithDecoder(r io.Reader, decodeValue func([]byte) string, fn func(Record) error) error {
|
||||
return parseWith(r, decodeValue, fn)
|
||||
}
|
||||
|
||||
func parseWith(r io.Reader, decodeValue func([]byte) string, fn func(Record) error) error {
|
||||
br := bufio.NewReaderSize(r, 64*1024)
|
||||
|
||||
rec := Record{}
|
||||
@@ -69,7 +84,11 @@ func Parse(r io.Reader, fn func(Record) error) error {
|
||||
return fmt.Errorf("read field %s: %w", name, err)
|
||||
}
|
||||
if headerDone && name != "" {
|
||||
rec[name] = string(val)
|
||||
if decodeValue != nil {
|
||||
rec[name] = decodeValue(val)
|
||||
} else {
|
||||
rec[name] = string(val)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user