001 /* 002 * To change this template, choose Tools | Templates 003 * and open the template in the editor. 004 */ 005 006 package org.util.xml.parse; 007 008 import org.util.xml.parse.policy.ParserPolicy; 009 import java.io.BufferedReader; 010 import java.io.File; 011 import java.io.InputStream; 012 import java.io.InputStreamReader; 013 import java.io.Reader; 014 import java.io.UnsupportedEncodingException; 015 import java.io.IOException; 016 import java.net.URI; 017 import java.util.ArrayList; 018 import javax.swing.JOptionPane; 019 import org.util.xml.element.Attributes; 020 import org.util.xml.element.Element; 021 import org.util.xml.element.TagElement; 022 import org.util.xml.element.TextElement; 023 import org.util.xml.parse.policy.*; 024 import org.util.xml.parse.XMLParseException; 025 026 027 /** 028 * 029 * @author masaru 030 */ 031 public class ElementParser { 032 033 private Reader reader_; 034 protected static ElementPartParser element_part_parser_; 035 private Element[] result_; 036 private ParserPolicy policy_; 037 private int tab_count_; 038 private String encoding_ = "unknown"; 039 private boolean select_encoding_after_readeing_first_line_; 040 private InputStream is_; 041 private ElementParser data_source_; 042 private URI document_base_; 043 private ArrayList<ParserPolicy> policy_stack_ = new ArrayList<ParserPolicy>(); 044 045 protected ElementParser(){} 046 public ElementParser(Reader reader) { 047 init(reader); 048 } 049 public ElementParser(InputStream is) { 050 init(is); 051 } 052 public ElementParser(InputStream is, String encoding) throws UnsupportedEncodingException { 053 init(new InputStreamReader(is,encoding)); 054 } 055 public void setDocumentBase(URI document_base) { 056 document_base_ = document_base; 057 } 058 public URI getDocumentBase() { 059 return document_base_; 060 } 061 public Element[] createSubElements(String sub_path) throws Exception { 062 ElementParser sub_parser = createSubParser(sub_path); 063 sub_parser.parse(); 064 return sub_parser.getResult(); 065 066 } 067 public ElementParser createSubParser(String sub_path) throws Exception { 068 URI uri = null; 069 if(document_base_ != null) 070 uri = document_base_.resolve(sub_path); 071 else { 072 uri = new File(sub_path).toURI(); 073 } 074 ElementParser sub_parser = new ElementParser(uri.toURL().openStream()); 075 sub_parser.setDocumentBase(uri.resolve("..")); 076 sub_parser.setPolicy(policy_); 077 return sub_parser; 078 } 079 080 private void initParsers() { 081 if(policy_==null) { 082 policy_ = new DefaultParserPolicy() { 083 String encoding_; 084 public boolean forceEmptyTag(String key) { 085 return false; 086 } 087 public Element allowElement(Element element) { 088 if(encoding_ == null) { 089 if(element.isTagElement()){ 090 TagElement te = (TagElement)element; 091 if(te.isPI()) 092 encoding_ = te.getAttributeValue("encoding"); 093 } 094 if(encoding_ == null) encoding_ = "utf-8"; 095 } 096 return element; 097 } 098 099 public String selectEncoding(String last_tag_key) { 100 return encoding_; 101 } 102 }; 103 } 104 element_part_parser_ = new ElementPartParser(policy_); 105 } 106 107 public void setPolicy(ParserPolicy policy) { 108 policy_ = policy; 109 initParsers(); 110 } 111 112 private void init(InputStream is) { 113 initParsers(); 114 select_encoding_after_readeing_first_line_ = true; 115 is_ = is; 116 } 117 private void init(Reader reader){ 118 initParsers(); 119 if(reader instanceof InputStreamReader){ 120 encoding_ = ((InputStreamReader)reader).getEncoding(); 121 reader_ = new BufferedReader(reader); 122 }else 123 reader_ = reader; 124 } 125 126 public void error(ParseElement source) { 127 128 System.err.println("error: "); 129 System.err.println(source); 130 try{ 131 for(int i=0;i<1000;i++) { 132 System.err.print((char)get()); 133 } 134 }catch(Exception e) {} 135 } 136 137 138 139 public Element[] parse() throws IOException, XMLParseException { 140 tab_count_ = 0; 141 142 ArrayList<Element> list = new ArrayList<Element>(); 143 144 if (reader_ != null) { 145 data_source_ = this; 146 } 147 else { 148 data_source_ = new ElementParser() { 149 public int get() throws IOException { 150 return is_.read(); 151 } 152 }; 153 } 154 element_part_parser_.error_text_ = new StringBuffer(); 155 156 try { 157 int last = parse(data_source_.get(), list); 158 } 159 catch(XMLParseException e){ 160 if(e.getMessage().equals("$cancel")) 161 System.out.println("parse canelled"); 162 else 163 throw e; 164 } 165 catch(IOException e){ 166 if(e.getMessage().equals("$cancel")) 167 System.out.println("parse canelled"); 168 else 169 throw e; 170 } 171 /*if( last == -1) 172 System.out.println("end of stream.(ok)"); 173 else 174 System.out.println("! Not end of stream !");*/ 175 176 result_ = list.toArray(new Element[]{}); 177 178 //for(int i=0;i<list.size();i++) 179 // System.out.println(list.get(i).toString()); 180 return result_; 181 } 182 183 private int parse(int next, ArrayList<Element> list) throws XMLParseException, IOException { 184 185 Element element = null; 186 187 while(next>=0) { 188 189 next = element_part_parser_.parse(next, data_source_); 190 191 if(element_part_parser_.is_error_ && policy_.throwExceptionIfDocumentHasError()) 192 throw new XMLParseException(element_part_parser_.error_text_.toString()); 193 194 if(element_part_parser_.isTextElement()) { 195 TextElement text_element = element_part_parser_.getTextElement(); 196 element = policy_.allowElement(text_element); 197 198 } else if(element_part_parser_.isTagElement()){ 199 200 TagElement tag_element = element_part_parser_.getTagElement(); 201 //System.out.println(tag_element); 202 //JOptionPane.showMessageDialog(null, tag_element.getKey()); 203 204 if(reader_ == null) { 205 String encoding = policy_.selectEncoding(tag_element.getKey()); 206 if(encoding != null) { 207 //System.out.println("set encoding: "+encoding); 208 encoding_ = encoding; 209 try { 210 reader_ = new BufferedReader(new InputStreamReader(is_, encoding)); 211 } 212 catch(UnsupportedEncodingException exc) { 213 throw new XMLParseException(exc.toString()); 214 } 215 data_source_ = this; 216 } 217 } 218 219 //System.out.println("\nkey: "+tag_element.getKey()); 220 //System.out.println("att: "+tag_element.getAttributes()); 221 222 if(element_part_parser_.isStartTag()){ 223 //JOptionPane.showMessageDialog(null, "start tag:\n"); 224 policy_stack_.add(policy_); 225 policy_ = policy_.getInnerPolicy(tag_element); 226 227 tab_count_++; 228 String start_key = tag_element.getKey(); 229 //System.out.println("start tag: "+start_key); 230 231 ArrayList<Element> children = new ArrayList<Element>(); 232 next = parse(next, children); 233 234 String end_key = element_part_parser_.getEndTagName(); 235 if(policy_.checkEndTag()) 236 if(!start_key.equals(end_key) && policy_.throwExceptionIfDocumentHasError()) { 237 String message = "end tag does not match! (start:"+start_key+" end:"+end_key+")"; 238 if(element_part_parser_.is_error_) 239 element_part_parser_.error_text_.append(message); 240 else 241 throw new XMLParseException(message); 242 } 243 // throw new Exception("parse error: "+end_key+" does not match "+start_key); 244 245 tag_element.setChildren(children.toArray(new Element[]{})); 246 tab_count_--; 247 //System.out.println("end children :"+tag_element.getKey()); 248 if(policy_stack_.size()>0) 249 policy_ = policy_stack_.remove(policy_stack_.size()-1); 250 } 251 252 //JOptionPane.showMessageDialog(null, "add to list:\n"+tag_element.getKey()); 253 tag_element.setDocumentBase(getDocumentBase()); 254 255 element = policy_.allowElement(tag_element); 256 257 //System.out.println("add-----------------------"); 258 } else { // end tag 259 if(!policy_.forceEmptyTag(element_part_parser_.getEndTagName())) 260 return next; 261 element = null; 262 } 263 264 if(element != null) list.add(element); 265 266 next = element_part_parser_.parse(next, data_source_); 267 268 if(element_part_parser_.is_error_ && policy_.throwExceptionIfDocumentHasError()) 269 throw new XMLParseException(element_part_parser_.error_text_.toString()); 270 271 if(element_part_parser_.isTextElement()) { 272 TextElement text_element = element_part_parser_.getTextElement(); 273 element = policy_.allowElement(text_element); 274 275 } else if(element_part_parser_.isTagElement()){ 276 277 TagElement tag_element = element_part_parser_.getTagElement(); 278 //System.out.println(tag_element); 279 //JOptionPane.showMessageDialog(null, tag_element.getKey()); 280 281 if(reader_ == null) { 282 String encoding = policy_.selectEncoding(tag_element.getKey()); 283 if(encoding != null) { 284 //System.out.println("set encoding: "+encoding); 285 encoding_ = encoding; 286 try { 287 reader_ = new BufferedReader(new InputStreamReader(is_, encoding)); 288 } 289 catch(UnsupportedEncodingException exc) { 290 throw new XMLParseException(exc.toString()); 291 } 292 data_source_ = this; 293 } 294 } 295 296 //System.out.println("\nkey: "+tag_element.getKey()); 297 //System.out.println("att: "+tag_element.getAttributes()); 298 299 if(element_part_parser_.isStartTag()){ 300 //JOptionPane.showMessageDialog(null, "start tag:\n"); 301 policy_stack_.add(policy_); 302 policy_ = policy_.getInnerPolicy(tag_element); 303 if(policy_.finished()) 304 throw new XMLParseException("$cancel"); 305 306 tab_count_++; 307 String start_key = tag_element.getKey(); 308 //System.out.println("start tag: "+start_key); 309 310 ArrayList<Element> children = new ArrayList<Element>(); 311 next = parse(next, children); 312 313 String end_key = element_part_parser_.getEndTagName(); 314 if(policy_.checkEndTag()) 315 if(!start_key.equals(end_key) && policy_.throwExceptionIfDocumentHasError()) { 316 String message = "end tag does not match! (start:"+start_key+" end:"+end_key+")"; 317 if(element_part_parser_.is_error_) 318 element_part_parser_.error_text_.append(message); 319 else 320 throw new XMLParseException(message); 321 } 322 // throw new Exception("parse error: "+end_key+" does not match "+start_key); 323 324 tag_element.setChildren(children.toArray(new Element[]{})); 325 tab_count_--; 326 //System.out.println("end children :"+tag_element.getKey()); 327 if(policy_stack_.size()>0) 328 policy_ = policy_stack_.remove(policy_stack_.size()-1); 329 330 if(policy_.finished()) 331 throw new XMLParseException("$cancel"); 332 } 333 334 //JOptionPane.showMessageDialog(null, "add to list:\n"+tag_element.getKey()); 335 tag_element.setDocumentBase(getDocumentBase()); 336 337 element = policy_.allowElement(tag_element); 338 339 //System.out.println("add-----------------------"); 340 } else { // end tag 341 if(!policy_.forceEmptyTag(element_part_parser_.getEndTagName())) 342 return next; 343 element = null; 344 } 345 346 if(element != null) list.add(element); 347 348 if(policy_.finished()) 349 throw new XMLParseException("$cancel"); 350 } 351 return -1; 352 } 353 354 355 public int escape (String message) throws XMLParseException, IOException { 356 int next = -1; 357 try { throw new Exception("mark");}catch(Exception e){e.printStackTrace();} 358 System.err.println("this documents has error: "+message); 359 System.err.println("skip---------------------"); 360 int c = get(); 361 System.err.print((char)c); 362 while(c!='>' && c!=-1) System.err.print((char)(c=get())); 363 // for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get())); 364 System.err.println("\n-------------------------"); 365 return get(); 366 } 367 368 public Element[] getResult() { 369 return result_; 370 } 371 public TagElement getFirstPlainTagElement() { 372 for(Element tmp : result_) 373 if(tmp.isTagElement()) { 374 TagElement tag = (TagElement)tmp; 375 if(!tag.isPI()) 376 return tag; 377 } 378 return null; 379 } 380 381 public String getEncoding() { 382 return encoding_; 383 } 384 385 int counter = 0; 386 long start = System.currentTimeMillis(); 387 public int get() throws IOException { 388 return reader_.read(); 389 390 // int val = reader_.read(); 391 // counter++; 392 //System.out.print("["+(char)val+"]"); 393 // return val; 394 } 395 public char getChar() throws IOException { 396 int b = get(); 397 if(b==-1) throw new IOException("end of stream."); 398 return (char)b; 399 } 400 }