001    /*
002     * To change this template, choose Tools | Templates
003     * and open the template in the editor.
004     */
005    
006    package org.util.xml.parse;
007    
008    import java.io.PrintWriter;
009    import java.io.StringWriter;
010    import java.io.IOException;
011    import org.util.xml.parse.policy.ParserPolicy;
012    import org.util.xml.element.Attribute;
013    import org.util.xml.element.TagElement;
014    import org.util.xml.element.TextElement;
015    
016    /**
017     *
018     * @author masaru
019     */
020    public class ElementPartParser extends ParseElement {
021    
022        // parsers
023        protected NameParser name_parser_;
024        protected SpaceParser space_parser_;
025        protected AttributeParser attribute_parser_;
026        protected TextElementParser text_element_parser_;
027        protected CommentInnerTagElementParser comment_inner_tag_element_parser_;
028    
029        protected ParserPolicy policy_;
030        
031        protected boolean is_novalue_occuered_;
032        protected boolean is_error_;
033        
034        protected TagElement tag_element_;
035        protected TextElement text_element_;
036        protected String endtag_name_;
037        protected boolean is_start_tag_;
038        protected StringBuffer error_text_;
039        
040        public ElementPartParser() {
041            this(null);
042        }
043        
044        public ElementPartParser(ParserPolicy policy) {
045            policy_ = policy;
046    
047            name_parser_ = new NameParser();
048            space_parser_ = new SpaceParser();
049            attribute_parser_ = new AttributeParser();
050            text_element_parser_ = new TextElementParser();
051            comment_inner_tag_element_parser_ = new CommentInnerTagElementParser();
052            error_text_ = new StringBuffer();
053        }
054        
055        @Override
056        public boolean match(char c) {
057            return c=='<';
058        }
059    
060        @Override
061            public int parse(int c, ElementParser parser) throws XMLParseException, IOException {
062    
063            is_error_ = false;
064            is_start_tag_ = false;
065            endtag_name_ = null;
066            text_element_ = null;
067            tag_element_ = null;
068            is_novalue_occuered_ = false;
069            
070            int next_word_ = -1;
071            int state = 0;
072    try{
073        
074            while(true) {
075                
076                //System.out.println("state:"+state+"|"+(char)c);
077                if(state == 0) {
078                    if(c=='<') state = 2;
079                    else if(isSpace(c)) ;
080                    else if(text_element_parser_.match((char)c)){
081                        c = text_element_parser_.parse(c, parser);
082                        text_element_ = new TextElement(text_element_parser_.getReturnValue());
083                        break;
084                    } else escape(parser, "parse error: cannot read tag: state=0 ???");
085                }else if(state == 1) {
086                    if(c=='<') state = 2;
087                    else throw new XMLParseException("parse error: cannot read tag: state=1 ???");
088                }else if(state == 2) {
089                    if((c=='/')) state = 6;
090                    else if(c=='?') {
091                        state = 9;
092                    } else if(comment_inner_tag_element_parser_.match((char)c)) {
093                        c = comment_inner_tag_element_parser_.parse(c, parser);
094                        text_element_ = comment_inner_tag_element_parser_.getResult();
095                        break;
096                    } else if(name_parser_.match((char)c)) {
097                        c = name_parser_.parse(c, parser);
098                        String key = name_parser_.getReturnValue();
099                        tag_element_ = new TagElement(key);
100                        if(policy_ != null && policy_.forceEmptyTag(key))
101                            tag_element_.setEmpty(true);
102                        else
103                            is_start_tag_ = true;
104                        state = 10;
105                        continue;
106                    } else throw new XMLParseException("parse error: cannot read tag: this charactar is not allowed at start of tag ("+(char)c+")");
107                }else if(state == 3) {
108                    next_word_ = c;
109                    break;
110                } else if(state == 4) {
111                    tag_element_.setEmpty(true);
112                    is_start_tag_ = false;
113                    if(c=='>') break;
114                    else throw new XMLParseException("parse error: cannot read tag: [<.../"+(char)c+"]");
115                }else if(state == 6) {
116                    c = name_parser_.parse(c, parser);
117                    endtag_name_ = name_parser_.getReturnValue();
118                    state = 7;
119                    continue;
120                }else if(state == 7) {
121                    if(c=='>') {
122                        break;
123                    } else if(space_parser_.match((char)c)) {
124                        c = space_parser_.parse(c, parser);
125                        continue;
126                    } else throw new XMLParseException("parse error: cannot read tag: state=7 cannot find '>'");
127                }else if(state == 9) {
128                    c = name_parser_.parse(c, parser);
129                    tag_element_ = new TagElement(name_parser_.getReturnValue());
130                    tag_element_.setPI(true);
131                    state = 10;
132                    continue;
133                }else if(state == 10) {
134                    if(space_parser_.match((char)c)) {
135                        c = space_parser_.parse(c, parser);
136                        state = 11;
137                        continue;
138                    } else {
139                        if(tag_element_.isPI()){
140                            if(c=='?') state = 4;
141                            else return escape(parser,"in <? ... ?> tag");
142                        } else {
143                            if(c=='>') break;
144                            else if(c=='/') state = 4;
145                            else if(is_novalue_occuered_) {
146                                c = attribute_parser_.parse(c, parser);
147                                Attribute attribute = attribute_parser_.getAttribute();
148                                is_novalue_occuered_ = attribute.isNovalue();
149                                tag_element_.addAttribute(attribute);
150                                state = 10;
151                                continue;
152                            } else return escape(parser,"cannot read "+(char)c);
153                        }
154                    }
155                }else if(state == 11) {
156                    if(attribute_parser_.match((char)c)) {
157                        c = attribute_parser_.parse(c, parser);
158                        Attribute attribute = attribute_parser_.getAttribute();
159                        is_novalue_occuered_ = attribute.isNovalue();
160                        tag_element_.addAttribute(attribute);
161                        state = 10;
162                        continue;
163                    } else {
164                        if(tag_element_.isPI()){
165                            if(c=='?') state = 4;
166                            else throw new XMLParseException("parse error: cannot read tag: state=11");
167                        } else {
168                            if(c=='>') break;
169                            else if(c=='/') state = 4;
170                            else throw new XMLParseException("parse error: cannot read tag: state=11");
171                        }
172                    }
173                }
174                if(state==0)
175                    c = parser.get();
176                else
177                    c = parser.getChar();
178            }
179            
180    } catch(IOException e) {
181        is_error_ = true;
182        StringWriter sw = new StringWriter();
183        e.printStackTrace(new PrintWriter(sw));
184        if(policy_.throwExceptionIfDocumentHasError())
185            error_text_.append(sw.toString());
186        parser.escape(e.getMessage());
187    } catch(XMLParseException e) {
188        is_error_ = true;
189        StringWriter sw = new StringWriter();
190        e.printStackTrace(new PrintWriter(sw));
191        if(policy_.throwExceptionIfDocumentHasError())
192            error_text_.append(sw.toString());
193        parser.escape(e.getMessage());
194    }
195            
196            int result = -1;
197            try {
198                if(text_element_!=null)
199                    result = c;
200                else
201                    result = parser.get();
202            }
203            catch (IOException e) {
204                throw new XMLParseException(e.toString());
205            }
206            return result;
207        }
208        
209        public int escape (ElementParser parser,String message) throws XMLParseException, IOException {
210            //try{throw new Exception("mark");}catch(Exception e){e.printStackTrace();}
211            is_error_ = true;
212            System.err.println("this documents has error: "+message);
213            System.err.println("skip---------------------");
214            int c = parser.get();
215            System.err.print((char)c);
216            while(c!='>' && c!=-1) System.err.print((char)(c=parser.get()));
217            //        for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get()));
218            System.err.println("\n-------------------------");
219            return parser.get();
220        }
221    
222        public boolean isTagElement() {
223            return (tag_element_!=null);
224        }
225        public boolean isTextElement() {
226            return (text_element_!=null);
227        }
228        public boolean isStartTag() {
229            return is_start_tag_;
230        }
231        public TextElement getTextElement() {
232            return text_element_;
233        }
234        public TagElement getTagElement() {
235            return tag_element_;
236        }
237        public String getEndTagName() {
238            return endtag_name_;
239        }
240    }