1
2
3
4
5
6
7 from genshi import XML
8 from genshi.core import Stream
9
10 START, END = Stream.START, Stream.END
11 XML_LANG = u'{http://www.w3.org/XML/1998/namespace}lang'
12 RDF_WRAPPER = u'rdf-wrapper'
13
15 if isinstance(valueOrList, unicode):
16 return langXML(valueOrList, lang, fragment, encoding)
17 else:
18
19 events = []
20 for value in valueOrList:
21 if value:
22 events.extend( langXML(value, lang, fragment, encoding).events )
23 return Stream(events)
24
25 -def langXML(text, lang, fragment=True, encoding=None):
26 if text.startswith('<?xml ') or text.startswith('<!DOCTYPE '):
27 fragment = False
28 if fragment:
29 text = '<xml>%s</xml>' % text
30 if isinstance(text, unicode):
31 encoding = 'utf-16'
32 text = text.encode(encoding)
33 stream = XML(text)
34 lang_filter = filter_language(lang)
35 if fragment:
36 return stream | skip_outer | lang_filter
37 else:
38 return stream | lang_filter
39
41 """A filter that doesn't actually do anything with the stream."""
42 istream = iter(stream)
43 istream.next()
44 last = None
45 for content in istream:
46 if last: yield last
47 last = content
48
50 def filter_lang(stream):
51 depth = 0
52 eating = False
53 for kind, data, pos in stream:
54 if kind == START and lang:
55 elemLang = get_elem_lang(data)
56 if elemLang:
57
58 if elemLang != lang:
59 eating = True
60 if eating == True:
61 if kind == START:
62 depth += 1
63 elif kind == END:
64 depth -= 1
65 if depth == 0: eating = False
66 continue
67
68 if kind == START and data[0] == RDF_WRAPPER:
69 continue
70 elif kind == END and data == RDF_WRAPPER:
71 continue
72 else:
73 yield kind, data, pos
74
75 return filter_lang
76
84