Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ jobs:
python-version: '3.11'
- os: ubuntu-latest
python-version: '3.12'
- os: ubuntu-latest
python-version: '3.13'

runs-on: ${{ matrix.os }}
timeout-minutes: 10
Expand Down
15 changes: 8 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"License :: OSI Approved :: Python Software Foundation License",
"Intended Audience :: Developers",
"Topic :: Text Processing :: Markup :: HTML",
Expand All @@ -37,11 +38,11 @@ build-backend = "setuptools.build_meta"
write_to = "src/emeraldtree/_version.py"

[tool.tox]
legacy_tox_ini = """
[tox]
envlist = py{39,310,311,312}
requires = ["tox>=4"]
env_list = ["py39", "py310", "py311", "py312", "py313"]

[testenv]
deps = pytest
commands = pytest -rs --pyargs {posargs:emeraldtree}
"""
[tool.tox.env.py]
deps = ["pytest"]
commands = [
["pytest", "-rs", "--pyargs", "{posargs:emeraldtree}"]
]
25 changes: 20 additions & 5 deletions src/emeraldtree/ElementPath.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,30 @@ def select(context, result):

def prepare_dot_dot(next, token):
def select(context, result):
parent_map = context.parent_map
if parent_map is None:
context.parent_map = parent_map = {}
# Prefer direct parent pointer if available. This allows '..' from any node.
for elem in result:
parent = getattr(elem, '_parent', None)
if parent is not None:
yield parent
# Fallback: if no direct parents were found (e.g., foreign objects), try building a local parent map
# from the current context root, but only once.
# Note: This fallback cannot find parents outside the context subtree.
if context.parent_map is None:
parent_map = {}
# Build mapping only for Element children to avoid iterating over strings
for p in context.root.iter():
# only consider Elements as parents
try:
iterator = iter(p)
except TypeError:
continue
for e in p:
parent_map[e] = p
context.parent_map = parent_map
# Yield parents for any remaining items not covered above
for elem in result:
if elem in parent_map:
yield parent_map[elem]
if elem in context.parent_map:
yield context.parent_map[elem]
return select

def prepare_predicate(next, token):
Expand Down
7 changes: 4 additions & 3 deletions src/emeraldtree/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ def handle_starttag(self, tag, attrs):
elif k == "content":
content = v
if http_equiv == "content-type" and content:
import cgi
_, params = cgi.parse_header(content)
encoding = params.get('charset')
from email.message import Message
msg = Message()
msg['content-type'] = content
encoding = msg.get_param('charset', header='content-type')
if encoding:
self.encoding = encoding
if tag.name in self.AUTOCLOSE:
Expand Down
3 changes: 0 additions & 3 deletions src/emeraldtree/tests/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ def test_Element_findall_bracketed_tag():
assert result[0] is b1 # b1 has 'c' childs

def test_Element_findall_dotdot():
pytest.skip('broken')
c1 = Element('c')
c2 = Element('c')
text = "text"
Expand All @@ -184,7 +183,6 @@ def test_Element_findall_dotdot():
assert result[1] is c2

def test_Element_findall_slashslash():
pytest.skip('broken')
c1 = Element('c')
c2 = Element('c')
text = "text"
Expand All @@ -199,7 +197,6 @@ def test_Element_findall_slashslash():
assert result[1] is c2

def test_Element_findall_dotslashslash():
pytest.skip('broken')
c1 = Element('c')
c2 = Element('c')
text = "text"
Expand Down
64 changes: 62 additions & 2 deletions src/emeraldtree/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ class Element(Node):

attrib = None

# Parent pointer (internal). None for root or detached elements.
_parent = None

##
# (Attribute) Text before first subelement. This is either a
# string or the value None, if there was no text.
Expand Down Expand Up @@ -151,6 +154,10 @@ def __init__(self, tag, attrib=None, children=(), **extra):
self.tag = tag
self.attrib = attrib
self._children = list(children)
# set parent pointers for element children
for ch in self._children:
if isinstance(ch, Element):
ch._parent = self

def __repr__(self):
return "<Element {} at {:x}>".format(repr(self.tag), id(self))
Expand Down Expand Up @@ -186,7 +193,34 @@ def __getitem__(self, index):
# @exception AssertionError If element is not a valid object.

def __setitem__(self, index, element):
self._children.__setitem__(index, element)
# clear parent of replaced children and set parent of new ones
if isinstance(index, slice):
# clear parents for removed elements
old_items = self._children[index]
for old in old_items:
if isinstance(old, Element):
old._parent = None
# assign
self._children[index] = element
# set parents for new elements
try:
iterator = iter(element)
except TypeError:
iterator = None
if iterator is not None:
for new in element:
if isinstance(new, Element):
new._parent = self
else:
try:
old = self._children[index]
except Exception:
old = None
if isinstance(old, Element):
old._parent = None
self._children[index] = element
if isinstance(element, Element):
element._parent = self

##
# Deletes the given subelement.
Expand All @@ -195,6 +229,19 @@ def __setitem__(self, index, element):
# @exception IndexError If the given element does not exist.

def __delitem__(self, index):
# clear parent pointer for removed element(s)
if isinstance(index, slice):
old_items = self._children[index]
for old in old_items:
if isinstance(old, Element):
old._parent = None
else:
try:
old = self._children[index]
except Exception:
old = None
if isinstance(old, Element):
old._parent = None
self._children.__delitem__(index)

##
Expand All @@ -205,6 +252,8 @@ def __delitem__(self, index):

def append(self, element):
self._children.append(element)
if isinstance(element, Element):
element._parent = self

##
# Appends subelements from a sequence.
Expand All @@ -215,6 +264,9 @@ def append(self, element):

def extend(self, elements):
self._children.extend(elements)
for e in elements:
if isinstance(e, Element):
e._parent = self

##
# Inserts a subelement at the given position in this element.
Expand All @@ -224,6 +276,8 @@ def extend(self, elements):

def insert(self, index, element):
self._children.insert(index, element)
if isinstance(element, Element):
element._parent = self

##
# Removes a matching subelement. Unlike the <b>find</b> methods,
Expand All @@ -236,11 +290,16 @@ def insert(self, index, element):

def remove(self, element):
self._children.remove(element)
if isinstance(element, Element):
element._parent = None

##
# Removes all subelements.

def remove_all(self):
for ch in self._children:
if isinstance(ch, Element):
ch._parent = None
self._children = []

##
Expand Down Expand Up @@ -355,7 +414,8 @@ def iter(self, tag=None):
for e in e.iter(tag):
yield e
else:
yield e
if tag is None:
yield e

##
# Creates a text iterator. The iterator loops over this element
Expand Down