XML ElementTree

Published by onesixx on

https://towardsdatascience.com/processing-xml-in-python-elementtree-c8992941efd2 : Processing XML in Python — ElementTree by Deepesh Nair

# XML (eXtensible Markup Language
# elements
#  text 

import xml.etree.ElementTree as ET

### Parsing
tree = ET.parse('./movie.xml')
root = tree.getroot()
print(f'tag=>{root.tag}  attrib=>{root.attrib} text=>{root.text}')

### parse with loop
for child in root:
    print(f'tag=>{child.tag}  attrib=>{child.attrib} text=>{child.text}')

[elem.tag    for elem in root.iter()]
[elem.attrib for elem in root.iter()]
[elem.text   for elem in root.iter()]

for rating in root.iter('rating'):
    print(f'tag=>{rating.tag}  attrib=>{rating.attrib} text=>{rating.text}')

### Scan with XPath
#- Xpath with tag
for movie in root.findall("./genre/decade/movie/[year='2000']"):
    print(f'tag=>{movie.tag}  attrib=>{movie.attrib} text=>{movie.text}')

#- Xtpah with attrib
for movie in root.findall("./genre/decade/movie/format/[@multiple='Yes']"):
    print(f'tag=>{movie.tag}  attrib=>{movie.attrib} text=>{movie.text}')
# ... parent element
for movie in root.findall("./genre/decade/movie/format/[@multiple='Yes']..."):
    print(f'tag=>{movie.tag}  attrib=>{movie.attrib} text=>{movie.text}')

#### Modify and Save 
b2tf = root.find("./genre/decade/movie[@title='Back 2 the Future']")
print(b2tf)
b2tf.attrib["title"] = 'Back Back Back'
tree.write("movies.xml")

# search with regex
import re
for form in root.findall("./genre/decade/movie/format"):
    print(re.search(',', form.text))

# add (modify) attribute with set()
import re
for form in root.findall("./genre/decade/movie/format"):
    match = re.search(',', form.text)
    if match:
        form.set('multiple', 'yes')
    else:
        form.set('multiple', 'No') 

for form in root.findall("./genre/decade/movie/format"):
    print(form.attrib, form.text)

# find wrong data
for decade in root.findall("./genre/decade"):
    print(decade.attrib)
    for year in decade.findall("./movie/year"):
        print(year.text)

for movie in root.findall("./genre/decade/movie/[year='2000']"):
    print(movie.attrib)

action = root.find("./genre[@category='Action']")
new_dec = ET.SubElement(action, 'decade')
new_dec.attrib["years"] = '2000s'

xmen = root.find("./genre/decade/movie[@title='X-Men']")
dec2000s = root.find("./genre[@category='Action']/decade[@years='2000s']")
dec2000s.append(xmen)
dec1990s = root.find("./genre[@category='Action']/decade[@years='1990s']")
dec1990s.remove(xmen)

# build the new xml
tree.write("movies.xml")
tree = ET.parse('movies.xml')
root = tree.getroot()
print(ET.tostring(root, encoding='utf8').decode('utf8'))

    
        
            
                DVD
                1981
                PG
                
                'Archaeologist and adventurer Indiana Jones 
                is hired by the U.S. government to find the Ark of  the Covenant before the Nazis.'
                
            
               
               DVD,Online
               1984
               PG
               None provided.
            
            
               Blu-ray
               1985
               PG
               Marty McFly
            
        
        
            
               dvd, digital
               2000
               PG-13
               Two mutants come to a private academy for their kind whose resident superhero team must oppose a terrorist organization with similar powers.
            
            
               VHS
               1992
               PG13
               NA.
            
               
               Online
               1992
               R
               WhAtEvER I Want!!!?!
            
            
    

    
        
            
                DVD
                1979
                R
                """""""""
            
        
        
            
                DVD
                1986
                PG13
                Funny movie on funny guy 
            
            
                blue-ray
                2000
                Unrated
                psychopathic Bateman
            
        
    

BeautifulSoup .prettify()

BeautifulSoup may be the simplest solution for Python <=3.9.

https://stackoverflow.com/questions/749796/pretty-printing-xml-in-python

xmlFile= '~/mmdetection/oxford/annotations/xmls/Abyssinian_160.xml'


from bs4 import BeautifulSoup

bs = BeautifulSoup(open(xmlFile), 'xml')
pretty_xml = bs.prettify()
print(pretty_xml)

lxml

Prettier output but with arguments.

from lxml import etree

x = etree.parse(xmlFile)
pretty_xml = etree.tostring(x, pretty_print=True, encoding=str)
print(pretty_xml)
Categories: vision

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Oldest
Newest Most Voted
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x