diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12fe688..2832af5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,6 +23,8 @@ jobs: python-version: '3.11' - os: ubuntu-latest python-version: '3.12' + - os: ubuntu-latest + python-version: '3.13' runs-on: ${{ matrix.os }} timeout-minutes: 10 diff --git a/pyproject.toml b/pyproject.toml index e20114c..355cdcc 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "License :: OSI Approved :: Python Software Foundation License", "Intended Audience :: Developers", "Topic :: Text Processing :: Markup :: HTML", @@ -37,11 +38,11 @@ build-backend = "setuptools.build_meta" write_to = "src/emeraldtree/_version.py" [tool.tox] -legacy_tox_ini = """ -[tox] -envlist = py{39,310,311,312} +requires = ["tox>=4"] +env_list = ["py39", "py310", "py311", "py312", "py313"] -[testenv] -deps = pytest -commands = pytest -rs --pyargs {posargs:emeraldtree} -""" +[tool.tox.env.py] +deps = ["pytest"] +commands = [ + ["pytest", "-rs", "--pyargs", "{posargs:emeraldtree}"] +] diff --git a/src/emeraldtree/ElementPath.py b/src/emeraldtree/ElementPath.py index 55b9952..3706df2 100644 --- a/src/emeraldtree/ElementPath.py +++ b/src/emeraldtree/ElementPath.py @@ -102,15 +102,30 @@ def select(context, result): def prepare_dot_dot(next, token): def select(context, result): - parent_map = context.parent_map - if parent_map is None: - context.parent_map = parent_map = {} + # Prefer direct parent pointer if available. This allows '..' from any node. + for elem in result: + parent = getattr(elem, '_parent', None) + if parent is not None: + yield parent + # Fallback: if no direct parents were found (e.g., foreign objects), try building a local parent map + # from the current context root, but only once. + # Note: This fallback cannot find parents outside the context subtree. + if context.parent_map is None: + parent_map = {} + # Build mapping only for Element children to avoid iterating over strings for p in context.root.iter(): + # only consider Elements as parents + try: + iterator = iter(p) + except TypeError: + continue for e in p: parent_map[e] = p + context.parent_map = parent_map + # Yield parents for any remaining items not covered above for elem in result: - if elem in parent_map: - yield parent_map[elem] + if elem in context.parent_map: + yield context.parent_map[elem] return select def prepare_predicate(next, token): diff --git a/src/emeraldtree/html.py b/src/emeraldtree/html.py index 2d94742..21dd91e 100644 --- a/src/emeraldtree/html.py +++ b/src/emeraldtree/html.py @@ -93,9 +93,10 @@ def handle_starttag(self, tag, attrs): elif k == "content": content = v if http_equiv == "content-type" and content: - import cgi - _, params = cgi.parse_header(content) - encoding = params.get('charset') + from email.message import Message + msg = Message() + msg['content-type'] = content + encoding = msg.get_param('charset', header='content-type') if encoding: self.encoding = encoding if tag.name in self.AUTOCLOSE: diff --git a/src/emeraldtree/tests/test_tree.py b/src/emeraldtree/tests/test_tree.py index 5aff12e..01d436e 100644 --- a/src/emeraldtree/tests/test_tree.py +++ b/src/emeraldtree/tests/test_tree.py @@ -170,7 +170,6 @@ def test_Element_findall_bracketed_tag(): assert result[0] is b1 # b1 has 'c' childs def test_Element_findall_dotdot(): - pytest.skip('broken') c1 = Element('c') c2 = Element('c') text = "text" @@ -184,7 +183,6 @@ def test_Element_findall_dotdot(): assert result[1] is c2 def test_Element_findall_slashslash(): - pytest.skip('broken') c1 = Element('c') c2 = Element('c') text = "text" @@ -199,7 +197,6 @@ def test_Element_findall_slashslash(): assert result[1] is c2 def test_Element_findall_dotslashslash(): - pytest.skip('broken') c1 = Element('c') c2 = Element('c') text = "text" diff --git a/src/emeraldtree/tree.py b/src/emeraldtree/tree.py index 817566b..405bc64 100644 --- a/src/emeraldtree/tree.py +++ b/src/emeraldtree/tree.py @@ -121,6 +121,9 @@ class Element(Node): attrib = None + # Parent pointer (internal). None for root or detached elements. + _parent = None + ## # (Attribute) Text before first subelement. This is either a # string or the value None, if there was no text. @@ -151,6 +154,10 @@ def __init__(self, tag, attrib=None, children=(), **extra): self.tag = tag self.attrib = attrib self._children = list(children) + # set parent pointers for element children + for ch in self._children: + if isinstance(ch, Element): + ch._parent = self def __repr__(self): return "".format(repr(self.tag), id(self)) @@ -186,7 +193,34 @@ def __getitem__(self, index): # @exception AssertionError If element is not a valid object. def __setitem__(self, index, element): - self._children.__setitem__(index, element) + # clear parent of replaced children and set parent of new ones + if isinstance(index, slice): + # clear parents for removed elements + old_items = self._children[index] + for old in old_items: + if isinstance(old, Element): + old._parent = None + # assign + self._children[index] = element + # set parents for new elements + try: + iterator = iter(element) + except TypeError: + iterator = None + if iterator is not None: + for new in element: + if isinstance(new, Element): + new._parent = self + else: + try: + old = self._children[index] + except Exception: + old = None + if isinstance(old, Element): + old._parent = None + self._children[index] = element + if isinstance(element, Element): + element._parent = self ## # Deletes the given subelement. @@ -195,6 +229,19 @@ def __setitem__(self, index, element): # @exception IndexError If the given element does not exist. def __delitem__(self, index): + # clear parent pointer for removed element(s) + if isinstance(index, slice): + old_items = self._children[index] + for old in old_items: + if isinstance(old, Element): + old._parent = None + else: + try: + old = self._children[index] + except Exception: + old = None + if isinstance(old, Element): + old._parent = None self._children.__delitem__(index) ## @@ -205,6 +252,8 @@ def __delitem__(self, index): def append(self, element): self._children.append(element) + if isinstance(element, Element): + element._parent = self ## # Appends subelements from a sequence. @@ -215,6 +264,9 @@ def append(self, element): def extend(self, elements): self._children.extend(elements) + for e in elements: + if isinstance(e, Element): + e._parent = self ## # Inserts a subelement at the given position in this element. @@ -224,6 +276,8 @@ def extend(self, elements): def insert(self, index, element): self._children.insert(index, element) + if isinstance(element, Element): + element._parent = self ## # Removes a matching subelement. Unlike the find methods, @@ -236,11 +290,16 @@ def insert(self, index, element): def remove(self, element): self._children.remove(element) + if isinstance(element, Element): + element._parent = None ## # Removes all subelements. def remove_all(self): + for ch in self._children: + if isinstance(ch, Element): + ch._parent = None self._children = [] ## @@ -355,7 +414,8 @@ def iter(self, tag=None): for e in e.iter(tag): yield e else: - yield e + if tag is None: + yield e ## # Creates a text iterator. The iterator loops over this element