001    /*
002     * To change this template, choose Tools | Templates
003     * and open the template in the editor.
004     */
005    
006    package org.util.xml.parse;
007    
008    import org.util.xml.parse.policy.ParserPolicy;
009    import java.io.BufferedReader;
010    import java.io.File;
011    import java.io.InputStream;
012    import java.io.InputStreamReader;
013    import java.io.Reader;
014    import java.io.UnsupportedEncodingException;
015    import java.io.IOException;
016    import java.net.URI;
017    import java.util.ArrayList;
018    import javax.swing.JOptionPane;
019    import org.util.xml.element.Attributes;
020    import org.util.xml.element.Element;
021    import org.util.xml.element.TagElement;
022    import org.util.xml.element.TextElement;
023    import org.util.xml.parse.policy.*;
024    import org.util.xml.parse.XMLParseException;
025    
026    
027    /**
028     *
029     * @author masaru
030     */
031    public class ElementParser {
032    
033        private Reader reader_;
034        protected static ElementPartParser element_part_parser_;
035        private Element[] result_;
036        private ParserPolicy policy_;
037        private int tab_count_;
038        private String encoding_ = "unknown";
039        private boolean select_encoding_after_readeing_first_line_;
040        private InputStream is_;
041        private ElementParser data_source_;
042        private URI document_base_;
043        private ArrayList<ParserPolicy> policy_stack_ = new ArrayList<ParserPolicy>();
044        
045        protected ElementParser(){}
046        public ElementParser(Reader reader) {
047            init(reader);
048        }
049        public ElementParser(InputStream is) {
050            init(is);
051        }
052        public ElementParser(InputStream is, String encoding) throws UnsupportedEncodingException {
053            init(new InputStreamReader(is,encoding));
054        }
055        public void setDocumentBase(URI document_base) {
056            document_base_ = document_base;
057        }
058        public URI getDocumentBase() {
059            return document_base_;
060        }
061        public Element[] createSubElements(String sub_path) throws Exception {
062            ElementParser sub_parser = createSubParser(sub_path);
063            sub_parser.parse();
064            return sub_parser.getResult();
065            
066        }
067        public ElementParser createSubParser(String sub_path) throws Exception {
068            URI uri = null;
069            if(document_base_ != null)
070                uri = document_base_.resolve(sub_path);
071            else {
072                uri = new File(sub_path).toURI();
073            }
074            ElementParser sub_parser = new ElementParser(uri.toURL().openStream());
075            sub_parser.setDocumentBase(uri.resolve(".."));
076            sub_parser.setPolicy(policy_);
077            return sub_parser;
078        }
079        
080        private void initParsers() {
081            if(policy_==null) {
082                policy_ = new DefaultParserPolicy() {
083                    String encoding_;
084                    public boolean forceEmptyTag(String key) {
085                        return false;
086                       }
087                    public Element allowElement(Element element) {
088                        if(encoding_ == null) {
089                            if(element.isTagElement()){
090                                TagElement te = (TagElement)element;
091                                if(te.isPI())
092                                    encoding_ = te.getAttributeValue("encoding");
093                            }
094                            if(encoding_ == null) encoding_ = "utf-8";
095                        }
096                        return element;
097                    }
098    
099                    public String selectEncoding(String last_tag_key) {
100                        return encoding_;
101                    }
102                };
103            }
104            element_part_parser_ = new ElementPartParser(policy_);
105        }
106        
107        public void setPolicy(ParserPolicy policy) {
108            policy_ = policy;
109            initParsers();
110        }
111    
112        private void init(InputStream is) {
113            initParsers();
114            select_encoding_after_readeing_first_line_ = true;
115            is_ = is;
116        }
117        private void init(Reader reader){
118            initParsers();
119            if(reader instanceof InputStreamReader){
120                encoding_ = ((InputStreamReader)reader).getEncoding();
121                reader_ = new BufferedReader(reader);
122            }else
123                reader_ = reader;
124        }
125        
126        public void error(ParseElement source) {
127            
128            System.err.println("error: ");
129            System.err.println(source);
130            try{
131                for(int i=0;i<1000;i++) {
132                    System.err.print((char)get());
133                }
134            }catch(Exception e) {}
135        }
136        
137    
138    
139        public Element[] parse() throws IOException, XMLParseException {
140            tab_count_ = 0;
141            
142            ArrayList<Element> list = new ArrayList<Element>();
143            
144            if (reader_ != null) {
145                data_source_ = this;
146            }
147            else {
148                data_source_ = new ElementParser() {
149                        public int get() throws IOException {
150                            return is_.read();
151                        }
152                    };
153            }
154            element_part_parser_.error_text_ = new StringBuffer();
155            
156            try {
157                int last = parse(data_source_.get(), list);
158            }
159            catch(XMLParseException e){
160                if(e.getMessage().equals("$cancel"))
161                    System.out.println("parse canelled");
162                else
163                    throw e;
164            }
165            catch(IOException e){
166                if(e.getMessage().equals("$cancel"))
167                    System.out.println("parse canelled");
168                else
169                    throw e;
170            }
171            /*if( last == -1)
172              System.out.println("end of stream.(ok)");
173              else
174              System.out.println("! Not end of stream !");*/
175            
176            result_ = list.toArray(new Element[]{});
177            
178            //for(int i=0;i<list.size();i++)
179            //  System.out.println(list.get(i).toString());
180            return result_;
181        }
182        
183        private int parse(int next, ArrayList<Element> list) throws XMLParseException, IOException {
184            
185            Element element = null;
186            
187            while(next>=0) {
188                
189                next = element_part_parser_.parse(next, data_source_);
190                
191                if(element_part_parser_.is_error_ && policy_.throwExceptionIfDocumentHasError())
192                    throw new XMLParseException(element_part_parser_.error_text_.toString());
193                
194                if(element_part_parser_.isTextElement()) {
195                    TextElement text_element = element_part_parser_.getTextElement();
196                    element = policy_.allowElement(text_element);
197                    
198                } else if(element_part_parser_.isTagElement()){
199                    
200                    TagElement tag_element = element_part_parser_.getTagElement();
201                    //System.out.println(tag_element);
202                    //JOptionPane.showMessageDialog(null, tag_element.getKey());
203                    
204                    if(reader_ == null) {
205                        String encoding = policy_.selectEncoding(tag_element.getKey());
206                        if(encoding != null) {
207                            //System.out.println("set encoding: "+encoding);
208                            encoding_ = encoding;
209                            try {
210                            reader_ = new BufferedReader(new InputStreamReader(is_, encoding));
211                            }
212                            catch(UnsupportedEncodingException exc) {
213                                throw new XMLParseException(exc.toString());
214                            }
215                            data_source_ = this;
216                        }
217                    }
218                    
219                    //System.out.println("\nkey: "+tag_element.getKey());
220                    //System.out.println("att: "+tag_element.getAttributes());
221                    
222                    if(element_part_parser_.isStartTag()){
223                        //JOptionPane.showMessageDialog(null, "start tag:\n");
224                        policy_stack_.add(policy_);
225                        policy_ = policy_.getInnerPolicy(tag_element);
226                        
227                        tab_count_++;
228                        String start_key = tag_element.getKey();
229                        //System.out.println("start tag: "+start_key);
230                        
231                        ArrayList<Element> children = new ArrayList<Element>();
232                        next = parse(next, children);
233                        
234                        String end_key = element_part_parser_.getEndTagName();
235                        if(policy_.checkEndTag())
236                            if(!start_key.equals(end_key) && policy_.throwExceptionIfDocumentHasError()) {
237                                String message = "end tag does not match! (start:"+start_key+" end:"+end_key+")";
238                                if(element_part_parser_.is_error_)
239                                    element_part_parser_.error_text_.append(message);
240                                else
241                                    throw new XMLParseException(message);
242                            }
243                        //                        throw new Exception("parse error: "+end_key+" does not match "+start_key);
244                        
245                        tag_element.setChildren(children.toArray(new Element[]{}));
246                        tab_count_--;
247                        //System.out.println("end children :"+tag_element.getKey());
248                        if(policy_stack_.size()>0)
249                            policy_ = policy_stack_.remove(policy_stack_.size()-1);
250                    }
251                    
252                    //JOptionPane.showMessageDialog(null, "add to list:\n"+tag_element.getKey());
253                    tag_element.setDocumentBase(getDocumentBase());
254                    
255                    element = policy_.allowElement(tag_element);
256                    
257                    //System.out.println("add-----------------------");
258                } else { // end tag
259                    if(!policy_.forceEmptyTag(element_part_parser_.getEndTagName()))
260                        return next;
261                    element = null;
262                }
263                
264                if(element != null) list.add(element);
265                
266                next = element_part_parser_.parse(next, data_source_);
267                
268                if(element_part_parser_.is_error_ && policy_.throwExceptionIfDocumentHasError())
269                    throw new XMLParseException(element_part_parser_.error_text_.toString());
270                
271                if(element_part_parser_.isTextElement()) {
272                    TextElement text_element = element_part_parser_.getTextElement();
273                    element = policy_.allowElement(text_element);
274                    
275                } else if(element_part_parser_.isTagElement()){
276                    
277                    TagElement tag_element = element_part_parser_.getTagElement();
278                    //System.out.println(tag_element);
279                    //JOptionPane.showMessageDialog(null, tag_element.getKey());
280                    
281                    if(reader_ == null) {
282                        String encoding = policy_.selectEncoding(tag_element.getKey());
283                        if(encoding != null) {
284                            //System.out.println("set encoding: "+encoding);
285                            encoding_ = encoding;
286                            try {
287                                reader_ = new BufferedReader(new InputStreamReader(is_, encoding));
288                            }
289                            catch(UnsupportedEncodingException exc) {
290                                throw new XMLParseException(exc.toString());
291                            }
292                            data_source_ = this;
293                        }
294                    }
295                    
296                    //System.out.println("\nkey: "+tag_element.getKey());
297                    //System.out.println("att: "+tag_element.getAttributes());
298                    
299                    if(element_part_parser_.isStartTag()){
300                        //JOptionPane.showMessageDialog(null, "start tag:\n");
301                        policy_stack_.add(policy_);
302                        policy_ = policy_.getInnerPolicy(tag_element);
303                        if(policy_.finished())
304                            throw new XMLParseException("$cancel"); 
305                
306                        tab_count_++;
307                        String start_key = tag_element.getKey();
308                        //System.out.println("start tag: "+start_key);
309                        
310                        ArrayList<Element> children = new ArrayList<Element>();
311                        next = parse(next, children);
312                        
313                        String end_key = element_part_parser_.getEndTagName();
314                        if(policy_.checkEndTag())
315                            if(!start_key.equals(end_key) && policy_.throwExceptionIfDocumentHasError()) {
316                                String message = "end tag does not match! (start:"+start_key+" end:"+end_key+")";
317                                if(element_part_parser_.is_error_)
318                                    element_part_parser_.error_text_.append(message);
319                                else
320                                    throw new XMLParseException(message);
321                            }
322                        //                        throw new Exception("parse error: "+end_key+" does not match "+start_key);
323                        
324                        tag_element.setChildren(children.toArray(new Element[]{}));
325                        tab_count_--;
326                        //System.out.println("end children :"+tag_element.getKey());
327                        if(policy_stack_.size()>0)
328                            policy_ = policy_stack_.remove(policy_stack_.size()-1);
329    
330                        if(policy_.finished())
331                            throw new XMLParseException("$cancel");
332                    }
333                    
334                    //JOptionPane.showMessageDialog(null, "add to list:\n"+tag_element.getKey());
335                    tag_element.setDocumentBase(getDocumentBase());
336                    
337                    element = policy_.allowElement(tag_element);
338                    
339                    //System.out.println("add-----------------------");
340                } else { // end tag
341                    if(!policy_.forceEmptyTag(element_part_parser_.getEndTagName()))
342                        return next;
343                    element = null;
344                }
345                
346                if(element != null) list.add(element);
347                
348                if(policy_.finished())
349                    throw new XMLParseException("$cancel");
350            }
351            return -1;
352        }
353        
354    
355        public int escape (String message) throws XMLParseException, IOException {
356            int next = -1;
357            try { throw new Exception("mark");}catch(Exception e){e.printStackTrace();}
358            System.err.println("this documents has error: "+message);
359            System.err.println("skip---------------------");
360            int c = get();
361            System.err.print((char)c);
362            while(c!='>' && c!=-1) System.err.print((char)(c=get()));
363            //        for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get()));
364            System.err.println("\n-------------------------");
365            return get();
366        }
367        
368        public Element[] getResult() {
369            return result_;
370        }
371        public TagElement getFirstPlainTagElement() {
372            for(Element tmp : result_)
373                if(tmp.isTagElement()) {
374                    TagElement tag = (TagElement)tmp;
375                    if(!tag.isPI())
376                        return tag;
377                }
378            return null;
379        }
380        
381        public String getEncoding() {
382            return encoding_;
383        }
384        
385        int counter = 0;
386        long start = System.currentTimeMillis();
387        public int get() throws IOException {
388            return reader_.read();
389    
390    //        int val = reader_.read();
391    //        counter++;
392    //System.out.print("["+(char)val+"]");
393    //        return val;
394        }
395        public char getChar() throws IOException {
396            int b = get();
397            if(b==-1) throw new IOException("end of stream.");
398            return (char)b;
399        }
400    }