From bf0f4b9eda41c6572e90a2d6317839e0d2ee8685 Mon Sep 17 00:00:00 2001 From: Buddy Sandidge Date: Mon, 23 Oct 2023 18:22:28 -0700 Subject: [PATCH] Initial commit of tab formatted table convert app --- .gitignore | 2 + document.go | 66 ++++++++++++++++++ go.mod | 3 + main.go | 98 ++++++++++++++++++++++++++ parser.go | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 365 insertions(+) create mode 100644 .gitignore create mode 100644 document.go create mode 100644 go.mod create mode 100644 main.go create mode 100644 parser.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..85c9cfb --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/tft-convert +/.idea/ diff --git a/document.go b/document.go new file mode 100644 index 0000000..68efc84 --- /dev/null +++ b/document.go @@ -0,0 +1,66 @@ +package main + +import ( + "encoding/json" + "fmt" + "slices" + "strings" +) + +type Document struct { + Header []string + Rows [][]string +} + +func (d Document) MarshalJSON() ([]byte, error) { + headers := keys(d.Header) + var ret strings.Builder + ret.WriteRune('[') + for i, row := range d.Rows { + ret.WriteRune('{') + + for j, col := range row { + obj, err := json.Marshal(map[string]string{headers[j]: col}) + if err != nil { + return nil, fmt.Errorf("failed to marshal %s: %w", headers[j], err) + } + ret.WriteString(strings.Trim(string(obj), "{}")) + if j < len(row)-1 { + ret.WriteRune(',') + } + } + + if i < len(d.Rows)-1 { + ret.WriteString("},") + } else { + ret.WriteRune('}') + } + } + + ret.WriteRune(']') + return []byte(ret.String()), nil +} + +func (d *Document) setHeader(h []string) error { + d.Header = h + return nil +} + +func (d *Document) addRow(row []string) error { + d.Rows = append(d.Rows, row) + return nil +} + +func keys(list []string) []string { + keys := make([]string, 0, len(list)) + for _, h := range list { + heading := h + index := 0 + for slices.Contains(keys, heading) { + index++ + heading = fmt.Sprintf("%s_%d", h, index) + } + keys = append(keys, heading) + } + return keys +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..69456e6 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.buddy.wtf/apps/tft-convert + +go 1.21.3 diff --git a/main.go b/main.go new file mode 100644 index 0000000..d3d0056 --- /dev/null +++ b/main.go @@ -0,0 +1,98 @@ +package main + +import ( + "encoding/csv" + "encoding/json" + "errors" + "flag" + "fmt" + "io" + "os" + "sync" +) + +const ( + FormatCSV = "csv" + FormatJSON = "json" + FormatStream = "stream" +) + +type UnknownFormatError struct{ Format string } + +func (e UnknownFormatError) Error() string { + return "unknown format: '" + e.Format + "'" +} + +func main() { + if err := run(); err != nil { + _, _ = os.Stderr.WriteString("ERROR: " + err.Error()) + os.Exit(1) + } +} + +func run() error { + var in string + var out string + var format string + var width int + + flag.StringVar(&in, "in", "", "input file") + flag.StringVar(&out, "out", "", "output file") + flag.StringVar(&format, "format", "csv", "output format (csv,json,stream)") + flag.IntVar(&width, "width", DefaultTabWidth, "spaces per tab") + flag.Parse() + + var input io.ReadCloser = os.Stdin + var output io.WriteCloser = os.Stdout + + if in != "" { + var err error + if input, err = os.Open(in); err != nil { + return fmt.Errorf("failed to get input %s: %w", in, err) + } + } + + if out != "" { + var err error + if output, err = os.Create(out); err != nil { + return fmt.Errorf("failed to get output %s: %w", out, err) + } + } + + p := Parser{Width: width} + var err error + switch format { + case FormatCSV: + var once sync.Once + writer := csv.NewWriter(output) + err = p.IterSlice(input, func(headers []string, values []string) error { + var err error + once.Do(func() { err = writer.Write(headers) }) + if err != nil { + return err + } + return writer.Write(values) + }) + writer.Flush() + + case FormatStream: + enc := json.NewEncoder(output) + err = p.IterMap(input, func(values map[string]string) error { + return enc.Encode(values) + }) + + case FormatJSON: + var doc Document + doc, err = p.Parse(input) + if err != nil { + return fmt.Errorf("failed to parse input: %w", + errors.Join(err, input.Close(), output.Close())) + } + err = json.NewEncoder(output).Encode(doc) + + default: + err = UnknownFormatError{Format: format} + } + + return errors.Join(err, input.Close(), output.Close()) +} diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..7410719 --- /dev/null +++ b/parser.go @@ -0,0 +1,196 @@ +package main + +import ( + "bufio" + "fmt" + "io" + "strings" +) + +const DefaultTabWidth = 8 + +type Token struct { + Name string + Pos int + Length int +} + +type Parser struct { + Width int +} + +func (p Parser) IterMap(r io.Reader, onValue func(map[string]string) error) error { + return p.IterSlice(r, func(headers, row []string) error { + return onValue(zip(headers, row)) + }) +} + +func (p Parser) IterSlice(r io.Reader, onValue func([]string, []string) error) error { + var headers []string + return iter{ + Reader: r, + Width: p.width(), + OnHeaders: func(h []string) error { + headers = h + return nil + }, + OnRow: func(row []string) error { + return onValue(headers, row) + }, + }.Run() +} + +func (p Parser) Parse(r io.Reader) (Document, error) { + var doc Document + return doc, iter{ + Reader: r, + OnHeaders: doc.setHeader, + OnRow: doc.addRow, + Width: p.width(), + }.Run() +} + +func (p Parser) width() int { + if p.Width == 0 { + return DefaultTabWidth + } + return p.Width +} + +type iter struct { + Reader io.Reader + Width int + OnHeaders func([]string) error + OnRow func([]string) error +} + +func (i iter) Run() error { + s := bufio.NewScanner(i.Reader) + var tokens []Token + if s.Scan() { + tokens = i.tokens(s.Text()) + headers := make([]string, len(tokens), len(tokens)) + for i, header := range tokens { + headers[i] = header.Name + } + if err := i.OnHeaders(headers); err != nil { + return fmt.Errorf("failed to handle headers: %w", err) + } + } + if err := s.Err(); err != nil { + return fmt.Errorf("failed to read header line: %w", err) + } + + for s.Scan() { + if err := i.OnRow(i.parse(tokens, s.Text())); err != nil { + return fmt.Errorf("failed to handle values: %w", err) + } + } + if err := s.Err(); err != nil { + return fmt.Errorf("failed to read input: %w", err) + } + + return nil +} + +func (i iter) tokens(line string) []Token { + index := 0 + pos := 0 + tokens := make([]Token, 0) + word := make([]rune, 0, len([]rune(line))) + for _, char := range line { + if char != '\t' { + if len(word) == 0 { + index = pos + } + word = append(word, char) + pos += 1 + } else { + if len(word) == 0 { + pos += i.Width + if len(tokens) > 0 { + tokens[len(tokens)-1].Length += i.Width + } + } else { + padding := i.Width - (len(word) % i.Width) + pos += padding + tokens = append(tokens, Token{ + Name: string(word), + Length: len(word) + padding, + Pos: index, + }) + word = word[len(word):] + } + } + } + + if len(word) != 0 { + tokens = append(tokens, Token{ + Name: string(word), + Length: len(word), + Pos: index, + }) + } + return tokens +} + +func (i iter) parse(headings []Token, l string) []string { + line := removeTabs(i.Width, l) + values := make([]string, len(headings), len(headings)) + var value string + for j, heading := range headings { + if len(line) >= heading.Pos+heading.Length { + if j >= len(headings)-1 { + value = line[heading.Pos:] + } else { + value = line[heading.Pos : heading.Pos+heading.Length] + } + } else if len(line) >= heading.Pos { + value = line[heading.Pos:] + } else { + value = "" + } + values[j] = strings.TrimSpace(value) + } + return values +} + +func zip[K comparable, V any](keys []K, values []V) map[K]V { + ret := make(map[K]V, len(keys)) + for i, key := range keys { + if i < len(values) { + ret[key] = values[i] + } else { + ret[key] = *new(V) + } + } + return ret +} + +func removeTabs(width int, line string) string { + var ret strings.Builder + var word []rune + for _, char := range line { + if char != '\t' { + word = append(word, char) + } else { + if len(word) == 0 { + ret.WriteString(spaces(width)) + } else { + ret.WriteString(string(word) + spaces(width-(len(word)%width))) + word = make([]rune, 0) + } + } + } + ret.WriteString(string(word)) + return ret.String() +} + +func spaces(length int) string { + n := max(length, 0) + ret := make([]rune, n) + for i := 0; i < n; i++ { + ret[i] = ' ' + } + return string(ret) +}