From ca3da1b2de4f69313c00d7832198ac44b55cf6e4 Mon Sep 17 00:00:00 2001
From: Dario Nieuwenhuis <dirbaio@dirbaio.net>
Date: Thu, 4 Nov 2021 19:25:43 +0100
Subject: [PATCH] Split python code in a few modules

---
 .gitignore                        |   1 +
 parse.py => stm32data/__main__.py | 275 +-----------------------------
 stm32data/header.py               | 208 ++++++++++++++++++++++
 stm32data/util.py                 |  13 ++
 stm32data/yaml.py                 |  64 +++++++
 5 files changed, 294 insertions(+), 267 deletions(-)
 rename parse.py => stm32data/__main__.py (84%)
 create mode 100644 stm32data/header.py
 create mode 100644 stm32data/util.py
 create mode 100644 stm32data/yaml.py

diff --git a/.gitignore b/.gitignore
index 800d8f1..2aac4e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
 /tmp
 .idea/
 transform*.yaml
+__pycache__
\ No newline at end of file
diff --git a/parse.py b/stm32data/__main__.py
similarity index 84%
rename from parse.py
rename to stm32data/__main__.py
index 0794535..a8a5909 100755
--- a/parse.py
+++ b/stm32data/__main__.py
@@ -2,12 +2,6 @@
 
 import sys
 import xmltodict
-import yaml
-
-try:
-    from yaml import CSafeLoader as SafeLoader
-except ImportError:
-    from yaml import SafeLoader
 
 import re
 import json
@@ -15,36 +9,9 @@ import os
 from collections import OrderedDict
 from glob import glob
 
-
-class DecimalInt:
-    def __init__(self, val):
-        self.val = val
-
-
-def represent_decimal_int(dumper, data):
-    return dumper.represent_int(data.val)
-
-
-yaml.add_representer(DecimalInt, represent_decimal_int)
-
-
-class HexInt:
-    def __init__(self, val):
-        self.val = val
-
-
-def represent_hex_int(dumper, data):
-    return dumper.represent_int(hex(data.val))
-
-
-yaml.add_representer(HexInt, represent_hex_int)
-
-
-def removeprefix(value: str, prefix: str, /) -> str:
-    if value.startswith(prefix):
-        return value[len(prefix):]
-    else:
-        return value[:]
+from stm32data import yaml, header
+from stm32data.yaml import DecimalInt, HexInt
+from stm32data.util import removeprefix, removesuffix
 
 
 def corename(d):
@@ -58,38 +25,6 @@ def corename(d):
         return name
 
 
-def removesuffix(value: str, suffix: str, /) -> str:
-    if value.endswith(suffix):
-        return value[:-len(suffix)]
-    else:
-        return value[:]
-
-
-def represent_ordereddict(dumper, data):
-    value = []
-
-    for item_key, item_value in data.items():
-        node_key = dumper.represent_data(item_key)
-        node_value = dumper.represent_data(item_value)
-
-        value.append((node_key, node_value))
-
-    return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
-
-
-yaml.add_representer(OrderedDict, represent_ordereddict)
-
-
-def hexint_presenter(dumper, data):
-    if data > 0x10000:
-        return dumper.represent_int(hex(data))
-    else:
-        return dumper.represent_int(data)
-
-
-yaml.add_representer(int, hexint_presenter)
-
-
 def children(x, key):
     r = x.get(key)
     if r is None:
@@ -99,178 +34,6 @@ def children(x, key):
     return [r]
 
 
-headers_parsed = {}
-header_map = {}
-with open('header_map.yaml', 'r') as f:
-    y = yaml.load(f, Loader=SafeLoader)
-    for header, chips in y.items():
-        for chip in chips.split(','):
-            header_map[chip.strip().lower()] = header.lower()
-
-
-def find_header(model):
-    # for a, b in header_map:
-    #    model = re.sub(a, b, model, flags=re.IGNORECASE)
-    model = model.lower()
-
-    # if it's in the map, just go
-    if r := header_map.get(model):
-        return r
-
-    # if not, find it by regex, taking `x` meaning `anything`
-    res = []
-    for h in headers_parsed.keys():
-        if re.match('^' + h.replace('x', '.') + '$', model):
-            res.append(h)
-
-    if len(res) == 0:
-        return None
-    assert len(res) == 1
-    return res[0]
-
-
-def paren_ok(val):
-    n = 0
-    for c in val:
-        if c == '(':
-            n += 1
-        if c == ')':
-            n -= 1
-        if n < 0:
-            return False
-    return n == 0
-
-
-# warning: horrible abomination ahead
-
-
-def parse_value(val, defines):
-    val = val.strip()
-    if val == '':
-        return 0
-    if m := re.match('(0([1-9][0-9]*)(U))', val):
-        return int(m.group(2), 10)
-    if m := re.match('((0x[0-9a-fA-F]+|\\d+))(|u|ul|U|UL)$', val):
-        return int(m.group(1), 0)
-    if m := re.match('([0-9A-Za-z_]+)$', val):
-        return defines.get(m.group(1), 0)
-    if m := re.match('\\((.*)\\)$', val):
-        if paren_ok(m.group(1)):
-            return parse_value(m.group(1), defines)
-    if m := re.match('\\*?\\([0-9A-Za-z_]+ *\\*?\\)(.*)$', val):
-        return parse_value(m.group(1), defines)
-    # if m := re.match('\\*?\\(u?int(8|16|32|64)_t\\ *)(.*)$', val):
-    #    return parse_value(m.group(1), defines)
-    if m := re.match('(.*)/(.*)$', val):
-        return parse_value(m.group(1), defines) / parse_value(m.group(2), defines)
-    if m := re.match('(.*)<<(.*)$', val):
-        return (parse_value(m.group(1), defines) << parse_value(m.group(2), defines)) & 0xFFFFFFFF
-    if m := re.match('(.*)>>(.*)$', val):
-        return parse_value(m.group(1), defines) >> parse_value(m.group(2), defines)
-    if m := re.match('(.*)\\|(.*)$', val):
-        return parse_value(m.group(1), defines) | parse_value(m.group(2), defines)
-    if m := re.match('(.*)&(.*)$', val):
-        return parse_value(m.group(1), defines) | parse_value(m.group(2), defines)
-    if m := re.match('~(.*)$', val):
-        return (~parse_value(m.group(1), defines)) & 0xFFFFFFFF
-    if m := re.match('(.*)\\+(.*)$', val):
-        return parse_value(m.group(1), defines) + parse_value(m.group(2), defines)
-    if m := re.match('(.*)-(.*)$', val):
-        return parse_value(m.group(1), defines) - parse_value(m.group(2), defines)
-    raise Exception("can't parse: " + val)
-
-
-def parse_header(f):
-    irqs = {}
-    defines = {}
-    cores = []
-    cur_core = 'all'
-
-    accum = ''
-    for l in open(f, 'r', encoding='utf-8', errors='ignore'):
-        l = l.strip()
-        l = accum + l
-        if l.endswith('\\'):
-            accum = l[:-1]
-            continue
-        accum = ''
-
-        # Scoped by a single core
-        if m := re.match('.*if defined.*CORE_CM(\\d+)(PLUS)?.*', l):
-            cur_core = "cm" + str(m.group(1))
-            if m.group(2) != None:
-                cur_core += "p"
-            # print("Cur core is ", cur_core, "matched", l)
-            found = False
-            for core in cores:
-                if core == cur_core:
-                    found = True
-            if not found:
-                cores.append(cur_core)
-            # print("Switching to core", cur_core, "for", f)
-        elif m := re.match('.*else.*', l):
-            cur_core = "all"
-            if m := re.match('.*else.*CORE_CM(\\d+)(PLUS)?.*', l):
-                cur_core = "cm" + str(m.group(1))
-                if m.group(2) != None:
-                    cur_core += "p"
-                # print("Cur core is ", cur_core, "matched", l)
-            elif len(cores) > 1:
-                # Pick the second core assuming we've already parsed one
-                cur_core = cores[1]
-
-            found = False
-            for core in cores:
-                if core == cur_core:
-                    found = True
-            if not found:
-                cores.append(cur_core)
-            # print("Switching to core", cur_core, "for", f)
-        elif m := re.match('.*endif.*', l):
-            # print("Switching to common core for", f)
-            cur_core = "all"
-
-        if cur_core not in irqs:
-            # print("Registering new core", cur_core)
-            irqs[cur_core] = {}
-        if cur_core not in defines:
-            defines[cur_core] = {}
-
-        if m := re.match('([a-zA-Z0-9_]+)_IRQn += (\\d+),? +/\\*!< (.*) \\*/', l):
-            # print("Found irq for", cur_core)
-            irqs[cur_core][m.group(1)] = int(m.group(2))
-
-        if m := re.match('#define +([0-9A-Za-z_]+)\\(', l):
-            defines[cur_core][m.group(1)] = -1
-        if m := re.match('#define +([0-9A-Za-z_]+) +(.*)', l):
-            name = m.group(1)
-            val = m.group(2)
-            name = name.strip()
-            if name == 'FLASH_SIZE':
-                continue
-            val = val.split('/*')[0].strip()
-            val = parse_value(val, defines[cur_core])
-            # print("Found define for", cur_core)
-            defines[cur_core][name] = val
-
-    # print("Found", len(cores), "cores for", f)
-    # print("Found", len(irqs['all']), "shared interrupts for", f)
-
-    if len(cores) == 0:
-        cores.append("all")
-
-    for core in cores:
-        if core != "all":
-            irqs[core].update(irqs['all'])
-            defines[core].update(defines['all'])
-
-    return {
-        'cores': cores,
-        'interrupts': irqs,
-        'defines': defines,
-    }
-
-
 def expand_name(name):
     if '(' not in name:
         return [name]
@@ -559,26 +322,6 @@ def documents_for(chip_name, type):
     return docs
 
 
-def parse_headers():
-    os.makedirs('sources/headers_parsed', exist_ok=True)
-    print('loading headers...')
-    for f in glob('sources/headers/*.h'):
-        # if 'stm32f4' not in f: continue
-        ff = removeprefix(f, 'sources/headers/')
-        ff = removesuffix(ff, '.h')
-
-        try:
-            with open('sources/headers_parsed/{}.json'.format(ff), 'r') as j:
-                res = json.load(j)
-        except:
-            print(f)
-            res = parse_header(f)
-            with open('sources/headers_parsed/{}.json'.format(ff), 'w') as j:
-                json.dump(res, j)
-
-        headers_parsed[ff] = res
-
-
 def chip_name_from_package_name(x):
     name_map = [
         ('(STM32L1....).x([AX])', '\\1-\\2'),
@@ -823,10 +566,9 @@ def parse_chips():
         else:
             del chip['device-id']
 
-        h = find_header(chip_name)
+        h = header.get_for_chip(chip_name)
         if h is None:
             raise Exception("missing header for {}".format(chip_name))
-        h = headers_parsed[h]
 
         found = []
 
@@ -939,7 +681,7 @@ def parse_chips():
             family_extra = "data/extra/family/" + chip['family'] + ".yaml"
             if os.path.exists(family_extra):
                 with open(family_extra) as extra_f:
-                    extra = yaml.load(extra_f, Loader=SafeLoader)
+                    extra = yaml.load(extra_f)
                     for (extra_name, extra_p) in extra['peripherals'].items():
                         peris[extra_name] = extra_p
 
@@ -1195,7 +937,7 @@ def parse_dma():
                     dmamux_file = 'L4RS'
                 for mf in glob('data/dmamux/{}_*.yaml'.format(dmamux_file)):
                     with open(mf, 'r') as yaml_file:
-                        y = yaml.load(yaml_file, Loader=SafeLoader)
+                        y = yaml.load(yaml_file)
                     mf = removesuffix(mf, '.yaml')
                     dmamux = mf[mf.index('_') + 1:]  # DMAMUX1 or DMAMUX2
 
@@ -1336,7 +1078,7 @@ def parse_rcc_regs():
         ff = removesuffix(ff, '.yaml')
         family_clocks = {}
         with open(f, 'r') as yaml_file:
-            y = yaml.load(yaml_file, Loader=SafeLoader)
+            y = yaml.load(yaml_file)
             for (key, body) in y.items():
                 if key.startswith("fieldset/A") and key.endswith("ENR"):
                     clock = removesuffix(key, "ENR")
@@ -1480,7 +1222,7 @@ memories = []
 
 def parse_memories():
     with open('data/memories.yaml', 'r') as yaml_file:
-        m = yaml.load(yaml_file, Loader=SafeLoader)
+        m = yaml.load(yaml_file)
         for each in m:
             memories.append(each)
 
@@ -1522,6 +1264,5 @@ parse_rcc_regs()
 parse_documentations()
 parse_dma()
 parse_gpio_af()
-parse_headers()
 parse_clocks()
 parse_chips()
diff --git a/stm32data/header.py b/stm32data/header.py
new file mode 100644
index 0000000..81e3c9e
--- /dev/null
+++ b/stm32data/header.py
@@ -0,0 +1,208 @@
+import re
+import os
+import json
+from glob import glob
+
+from stm32data import yaml
+from stm32data.util import removeprefix, removesuffix
+
+
+headers_parsed = {}
+header_map = {}
+with open('header_map.yaml', 'r') as f:
+    y = yaml.load(f)
+    for header, chips in y.items():
+        for chip in chips.split(','):
+            header_map[chip.strip().lower()] = header.lower()
+
+
+def get_for_chip(model):
+    if header := get_header_name_for_chip(model):
+        return headers_parsed[header]
+    return None
+
+
+def get_header_name_for_chip(model):
+    # for a, b in header_map:
+    #    model = re.sub(a, b, model, flags=re.IGNORECASE)
+    model = model.lower()
+
+    # if it's in the map, just go
+    if r := header_map.get(model):
+        return r
+
+    # if not, find it by regex, taking `x` meaning `anything`
+    res = []
+    for h in headers_parsed.keys():
+        if re.match('^' + h.replace('x', '.') + '$', model):
+            res.append(h)
+
+    if len(res) == 0:
+        return None
+    assert len(res) == 1
+    return res[0]
+
+
+def paren_ok(val):
+    n = 0
+    for c in val:
+        if c == '(':
+            n += 1
+        if c == ')':
+            n -= 1
+        if n < 0:
+            return False
+    return n == 0
+
+
+# warning: horrible abomination ahead
+
+
+def parse_value(val, defines):
+    val = val.strip()
+    if val == '':
+        return 0
+    if m := re.match('(0([1-9][0-9]*)(U))', val):
+        return int(m.group(2), 10)
+    if m := re.match('((0x[0-9a-fA-F]+|\\d+))(|u|ul|U|UL)$', val):
+        return int(m.group(1), 0)
+    if m := re.match('([0-9A-Za-z_]+)$', val):
+        return defines.get(m.group(1), 0)
+    if m := re.match('\\((.*)\\)$', val):
+        if paren_ok(m.group(1)):
+            return parse_value(m.group(1), defines)
+    if m := re.match('\\*?\\([0-9A-Za-z_]+ *\\*?\\)(.*)$', val):
+        return parse_value(m.group(1), defines)
+    # if m := re.match('\\*?\\(u?int(8|16|32|64)_t\\ *)(.*)$', val):
+    #    return parse_value(m.group(1), defines)
+    if m := re.match('(.*)/(.*)$', val):
+        return parse_value(m.group(1), defines) / parse_value(m.group(2), defines)
+    if m := re.match('(.*)<<(.*)$', val):
+        return (parse_value(m.group(1), defines) << parse_value(m.group(2), defines)) & 0xFFFFFFFF
+    if m := re.match('(.*)>>(.*)$', val):
+        return parse_value(m.group(1), defines) >> parse_value(m.group(2), defines)
+    if m := re.match('(.*)\\|(.*)$', val):
+        return parse_value(m.group(1), defines) | parse_value(m.group(2), defines)
+    if m := re.match('(.*)&(.*)$', val):
+        return parse_value(m.group(1), defines) | parse_value(m.group(2), defines)
+    if m := re.match('~(.*)$', val):
+        return (~parse_value(m.group(1), defines)) & 0xFFFFFFFF
+    if m := re.match('(.*)\\+(.*)$', val):
+        return parse_value(m.group(1), defines) + parse_value(m.group(2), defines)
+    if m := re.match('(.*)-(.*)$', val):
+        return parse_value(m.group(1), defines) - parse_value(m.group(2), defines)
+    raise Exception("can't parse: " + val)
+
+
+def parse_header(f):
+    irqs = {}
+    defines = {}
+    cores = []
+    cur_core = 'all'
+
+    accum = ''
+    for l in open(f, 'r', encoding='utf-8', errors='ignore'):
+        l = l.strip()
+        l = accum + l
+        if l.endswith('\\'):
+            accum = l[:-1]
+            continue
+        accum = ''
+
+        # Scoped by a single core
+        if m := re.match('.*if defined.*CORE_CM(\\d+)(PLUS)?.*', l):
+            cur_core = "cm" + str(m.group(1))
+            if m.group(2) != None:
+                cur_core += "p"
+            # print("Cur core is ", cur_core, "matched", l)
+            found = False
+            for core in cores:
+                if core == cur_core:
+                    found = True
+            if not found:
+                cores.append(cur_core)
+            # print("Switching to core", cur_core, "for", f)
+        elif m := re.match('.*else.*', l):
+            cur_core = "all"
+            if m := re.match('.*else.*CORE_CM(\\d+)(PLUS)?.*', l):
+                cur_core = "cm" + str(m.group(1))
+                if m.group(2) != None:
+                    cur_core += "p"
+                # print("Cur core is ", cur_core, "matched", l)
+            elif len(cores) > 1:
+                # Pick the second core assuming we've already parsed one
+                cur_core = cores[1]
+
+            found = False
+            for core in cores:
+                if core == cur_core:
+                    found = True
+            if not found:
+                cores.append(cur_core)
+            # print("Switching to core", cur_core, "for", f)
+        elif m := re.match('.*endif.*', l):
+            # print("Switching to common core for", f)
+            cur_core = "all"
+
+        if cur_core not in irqs:
+            # print("Registering new core", cur_core)
+            irqs[cur_core] = {}
+        if cur_core not in defines:
+            defines[cur_core] = {}
+
+        if m := re.match('([a-zA-Z0-9_]+)_IRQn += (\\d+),? +/\\*!< (.*) \\*/', l):
+            # print("Found irq for", cur_core)
+            irqs[cur_core][m.group(1)] = int(m.group(2))
+
+        if m := re.match('#define +([0-9A-Za-z_]+)\\(', l):
+            defines[cur_core][m.group(1)] = -1
+        if m := re.match('#define +([0-9A-Za-z_]+) +(.*)', l):
+            name = m.group(1)
+            val = m.group(2)
+            name = name.strip()
+            if name == 'FLASH_SIZE':
+                continue
+            val = val.split('/*')[0].strip()
+            val = parse_value(val, defines[cur_core])
+            # print("Found define for", cur_core)
+            defines[cur_core][name] = val
+
+    # print("Found", len(cores), "cores for", f)
+    # print("Found", len(irqs['all']), "shared interrupts for", f)
+
+    if len(cores) == 0:
+        cores.append("all")
+
+    for core in cores:
+        if core != "all":
+            irqs[core].update(irqs['all'])
+            defines[core].update(defines['all'])
+
+    return {
+        'cores': cores,
+        'interrupts': irqs,
+        'defines': defines,
+    }
+
+
+def parse_headers():
+    os.makedirs('sources/headers_parsed', exist_ok=True)
+    print('loading headers...')
+    for f in glob('sources/headers/*.h'):
+        # if 'stm32f4' not in f: continue
+        ff = removeprefix(f, 'sources/headers/')
+        ff = removesuffix(ff, '.h')
+
+        try:
+            with open('sources/headers_parsed/{}.json'.format(ff), 'r') as j:
+                res = json.load(j)
+        except:
+            print(f)
+            res = parse_header(f)
+            with open('sources/headers_parsed/{}.json'.format(ff), 'w') as j:
+                json.dump(res, j)
+
+        headers_parsed[ff] = res
+
+
+parse_headers()
diff --git a/stm32data/util.py b/stm32data/util.py
new file mode 100644
index 0000000..51e62c6
--- /dev/null
+++ b/stm32data/util.py
@@ -0,0 +1,13 @@
+
+def removeprefix(value: str, prefix: str) -> str:
+    if value.startswith(prefix):
+        return value[len(prefix):]
+    else:
+        return value[:]
+
+
+def removesuffix(value: str, suffix: str, /) -> str:
+    if value.endswith(suffix):
+        return value[:-len(suffix)]
+    else:
+        return value[:]
diff --git a/stm32data/yaml.py b/stm32data/yaml.py
new file mode 100644
index 0000000..605f256
--- /dev/null
+++ b/stm32data/yaml.py
@@ -0,0 +1,64 @@
+import yaml
+from collections import OrderedDict
+
+try:
+    from yaml import CSafeLoader as SafeLoader
+except ImportError:
+    from yaml import SafeLoader
+
+
+class DecimalInt:
+    def __init__(self, val):
+        self.val = val
+
+
+def represent_decimal_int(dumper, data):
+    return dumper.represent_int(data.val)
+
+
+yaml.add_representer(DecimalInt, represent_decimal_int)
+
+
+class HexInt:
+    def __init__(self, val):
+        self.val = val
+
+
+def represent_hex_int(dumper, data):
+    return dumper.represent_int(hex(data.val))
+
+
+yaml.add_representer(HexInt, represent_hex_int)
+
+
+def represent_ordereddict(dumper, data):
+    value = []
+
+    for item_key, item_value in data.items():
+        node_key = dumper.represent_data(item_key)
+        node_value = dumper.represent_data(item_value)
+
+        value.append((node_key, node_value))
+
+    return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)
+
+
+yaml.add_representer(OrderedDict, represent_ordereddict)
+
+
+def hexint_presenter(dumper, data):
+    if data > 0x10000:
+        return dumper.represent_int(hex(data))
+    else:
+        return dumper.represent_int(data)
+
+
+yaml.add_representer(int, hexint_presenter)
+
+
+def load(*args, **kwargs):
+    return yaml.load(*args, Loader=SafeLoader, **kwargs)
+
+
+def dump(*args, **kwargs):
+    return yaml.dump(*args, **kwargs)