001    /*
002     * To change this template, choose Tools | Templates
003     * and open the template in the editor.
004     */
005    
006    package org.util.xml.parse.policy;
007    
008    import org.util.xml.parse.policy.ParserPolicy;
009    import javax.swing.JOptionPane;
010    import org.util.xml.element.Element;
011    import org.util.xml.element.TagElement;
012    
013    /**
014     *
015     * @author masaru
016     */
017    public class HTMLParserPolicy extends DefaultParserPolicy {
018    
019        protected String[] forse_empty_tag_list_ = {"br","hr","meta","link","img","input","base","dd","dt","frame","p","pre","li","space"};
020        protected String encoding_ = null;
021        
022        public boolean checkEndTagMatch() {
023            return false;
024        }
025        public boolean forceEmptyTag(String key) {
026            for(int i=0;i<forse_empty_tag_list_.length;i++)
027                if(forse_empty_tag_list_[i].equals(key.toLowerCase()))
028                    return true;
029            return false;
030        }
031        
032        public Element allowElement(Element element) {
033    //        JOptionPane.showMessageDialog(null, "check\n"+element);
034            if(encoding_ == null && element.isTagElement()) {
035                TagElement telement = (TagElement)element;
036                if(telement.getKey().toLowerCase().equals("meta")) {
037                    if("content-type".equals(telement.getAttributeValue("http-equiv","").toLowerCase())) {
038                        String contenttext = telement.getAttributeValue("content");
039                        String encoding = null;
040                        int point = contenttext.indexOf("charset");
041                        if(point != -1) {
042                            for(int i=point+"charset".length();i<contenttext.length()&&encoding==null;i++) {
043                                char c = contenttext.charAt(i);
044                                if(c!=' ' && c!='=')
045                                    encoding = contenttext.substring(i, contenttext.length());
046                            }
047                            System.out.println("found encoding: "+ encoding);
048                            encoding_ = encoding;
049                        }
050                    }
051                }
052            }
053    //        JOptionPane.showMessageDialog(null, "encoding:"+encoding_);
054            return element;
055        }
056    
057        public String selectEncoding(String last_tag_key) {
058            if(last_tag_key!=null && last_tag_key.toLowerCase().equals("body")) {
059                encoding_ = "JISAutoDetect";
060                encoding_ = JOptionPane.showInputDialog("<html>encoding does not defained before reading body tag<br/>select encoding</html>",encoding_);
061            }
062            return encoding_;
063        }
064    }