001 /* 002 * To change this template, choose Tools | Templates 003 * and open the template in the editor. 004 */ 005 006 package org.util.xml.parse; 007 008 import org.util.xml.parse.policy.ParserPolicy; 009 import java.io.BufferedReader; 010 import java.io.File; 011 import java.io.InputStream; 012 import java.io.InputStreamReader; 013 import java.io.Reader; 014 import java.net.URI; 015 import java.util.ArrayList; 016 import javax.swing.JOptionPane; 017 import org.util.xml.element.Attributes; 018 import org.util.xml.element.Element; 019 import org.util.xml.element.TagElement; 020 import org.util.xml.element.TextElement; 021 import org.util.xml.parse.policy.*; 022 023 /** 024 * 025 * @author masaru 026 */ 027 public class ElementParser { 028 029 private Reader reader_; 030 protected static ElementPartParser element_part_parser_; 031 private Element[] result_; 032 private ParserPolicy policy_; 033 private int tab_count_; 034 private String encoding_ = "unknown"; 035 private boolean select_encoding_after_readeing_first_line_; 036 private InputStream is_; 037 private ElementParser data_source_; 038 private URI document_base_; 039 private ArrayList<ParserPolicy> policy_stack_ = new ArrayList<ParserPolicy>(); 040 041 protected ElementParser(){} 042 public ElementParser(Reader reader) throws Exception { 043 init(reader); 044 } 045 public ElementParser(InputStream is) throws Exception { 046 init(is); 047 } 048 public ElementParser(InputStream is, String encoding) throws Exception { 049 init(new InputStreamReader(is,encoding)); 050 } 051 public void setDocumentBase(URI document_base) { 052 document_base_ = document_base; 053 } 054 public URI getDocumentBase() { 055 return document_base_; 056 } 057 public Element[] createSubElements(String sub_path) throws Exception { 058 ElementParser sub_parser = createSubParser(sub_path); 059 sub_parser.parse(); 060 return sub_parser.getResult(); 061 062 } 063 public ElementParser createSubParser(String sub_path) throws Exception { 064 URI uri = null; 065 if(document_base_ != null) 066 uri = document_base_.resolve(sub_path); 067 else { 068 uri = new File(sub_path).toURI(); 069 } 070 ElementParser sub_parser = new ElementParser(uri.toURL().openStream()); 071 sub_parser.setDocumentBase(uri.resolve("..")); 072 sub_parser.setPolicy(policy_); 073 return sub_parser; 074 } 075 076 private void initParsers() { 077 if(policy_==null) { 078 policy_ = new DefaultParserPolicy() { 079 String encoding_; 080 public boolean forceEmptyTag(String key) { 081 return false; 082 } 083 public Element allowElement(Element element) { 084 if(encoding_ == null) { 085 if(element.isTagElement()){ 086 TagElement te = (TagElement)element; 087 if(te.isPI()) 088 encoding_ = te.getAttributeValue("encoding"); 089 } 090 if(encoding_ == null) encoding_ = "utf-8"; 091 } 092 return element; 093 } 094 095 public String selectEncoding(String last_tag_key) { 096 return encoding_; 097 } 098 }; 099 } 100 element_part_parser_ = new ElementPartParser(policy_); 101 } 102 103 public void setPolicy(ParserPolicy policy) { 104 policy_ = policy; 105 initParsers(); 106 } 107 108 private void init(InputStream is) throws Exception { 109 initParsers(); 110 select_encoding_after_readeing_first_line_ = true; 111 is_ = is; 112 } 113 private void init(Reader reader){ 114 initParsers(); 115 if(reader instanceof InputStreamReader){ 116 encoding_ = ((InputStreamReader)reader).getEncoding(); 117 reader_ = new BufferedReader(reader); 118 }else 119 reader_ = reader; 120 } 121 122 public void error(ParseElement source) { 123 124 System.err.println("error: "); 125 System.err.println(source); 126 try{ 127 for(int i=0;i<1000;i++) { 128 System.err.print((char)get()); 129 } 130 }catch(Exception e) {} 131 } 132 133 public Element[] parse() throws Exception{ 134 tab_count_ = 0; 135 136 ArrayList<Element> list = new ArrayList<Element>(); 137 138 if(reader_ != null) 139 data_source_ = this; 140 else { 141 data_source_ = new ElementParser() { 142 public int get() throws Exception { return is_.read(); } 143 }; 144 } 145 element_part_parser_.error_text_ = new StringBuffer(); 146 147 int last = parse(data_source_.get(), list); 148 149 /*if( last == -1) 150 System.out.println("end of stream.(ok)"); 151 else 152 System.out.println("! Not end of stream !");*/ 153 154 result_ = list.toArray(new Element[]{}); 155 156 //for(int i=0;i<list.size();i++) 157 // System.out.println(list.get(i).toString()); 158 return result_; 159 } 160 161 private int parse(int next, ArrayList<Element> list) throws Exception { 162 163 Element element = null; 164 165 while(next!=-1) { 166 167 next = element_part_parser_.parse(next, data_source_); 168 169 if(element_part_parser_.is_error_ && policy_.throwExceptionIfDocumentHasError()) 170 throw new Exception(element_part_parser_.error_text_.toString()); 171 172 if(element_part_parser_.isTextElement()) { 173 TextElement text_element = element_part_parser_.getTextElement(); 174 element = policy_.allowElement(text_element); 175 176 } else if(element_part_parser_.isTagElement()){ 177 178 TagElement tag_element = element_part_parser_.getTagElement(); 179 //System.out.println(tag_element); 180 //JOptionPane.showMessageDialog(null, tag_element.getKey()); 181 182 if(reader_ == null) { 183 String encoding = policy_.selectEncoding(tag_element.getKey()); 184 if(encoding != null) { 185 //System.out.println("set encoding: "+encoding); 186 encoding_ = encoding; 187 reader_ = new BufferedReader(new InputStreamReader(is_, encoding)); 188 data_source_ = this; 189 } 190 } 191 192 //System.out.println("\nkey: "+tag_element.getKey()); 193 //System.out.println("att: "+tag_element.getAttributes()); 194 195 if(element_part_parser_.isStartTag()){ 196 //JOptionPane.showMessageDialog(null, "start tag:\n"); 197 policy_stack_.add(policy_); 198 policy_ = policy_.getInnerPolicy(tag_element); 199 200 tab_count_++; 201 String start_key = tag_element.getKey(); 202 //System.out.println("start tag: "+start_key); 203 204 ArrayList<Element> children = new ArrayList<Element>(); 205 next = parse(next, children); 206 207 String end_key = element_part_parser_.getEndTagName(); 208 if(policy_.checkEndTag()) 209 if(!start_key.equals(end_key) && policy_.throwExceptionIfDocumentHasError()) { 210 String message = "end tag does not match! (start:"+start_key+" end:"+end_key+")"; 211 if(element_part_parser_.is_error_) 212 element_part_parser_.error_text_.append(message); 213 else 214 throw new Exception(message); 215 } 216 // throw new Exception("parse error: "+end_key+" does not match "+start_key); 217 218 tag_element.setChildren(children.toArray(new Element[]{})); 219 tab_count_--; 220 //System.out.println("end children :"+tag_element.getKey()); 221 if(policy_stack_.size()>0) 222 policy_ = policy_stack_.remove(policy_stack_.size()-1); 223 } 224 225 //JOptionPane.showMessageDialog(null, "add to list:\n"+tag_element.getKey()); 226 tag_element.setDocumentBase(getDocumentBase()); 227 228 element = policy_.allowElement(tag_element); 229 230 //System.out.println("add-----------------------"); 231 } else { // end tag 232 if(!policy_.forceEmptyTag(element_part_parser_.getEndTagName())) 233 return next; 234 element = null; 235 } 236 237 if(element != null) list.add(element); 238 239 } 240 return -1; 241 242 } 243 244 public int escape (String message) throws Exception { 245 try{throw new Exception("mark");}catch(Exception e){e.printStackTrace();} 246 System.err.println("this documents has error: "+message); 247 System.err.println("skip---------------------"); 248 int c = get(); 249 System.err.print((char)c); 250 while(c!='>' && c!=-1) System.err.print((char)(c=get())); 251 // for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get())); 252 System.err.println("\n-------------------------"); 253 return get(); 254 } 255 256 public Element[] getResult() { 257 return result_; 258 } 259 public TagElement getFirstPlainTagElement() { 260 for(Element tmp : result_) 261 if(tmp.isTagElement()) { 262 TagElement tag = (TagElement)tmp; 263 if(!tag.isPI()) 264 return tag; 265 } 266 return null; 267 } 268 269 public String getEncoding() { 270 return encoding_; 271 } 272 273 int counter = 0; 274 long start = System.currentTimeMillis(); 275 public int get() throws Exception { 276 return reader_.read(); 277 278 // int val = reader_.read(); 279 // counter++; 280 //System.out.print("["+(char)val+"]"); 281 // return val; 282 } 283 public char getChar() throws Exception { 284 int b = get(); 285 if(b==-1) throw new Exception("end of stream."); 286 return (char)b; 287 } 288 }