001 /* 002 * To change this template, choose Tools | Templates 003 * and open the template in the editor. 004 */ 005 006 package org.util.xml.parse; 007 008 import java.io.PrintWriter; 009 import java.io.StringWriter; 010 import org.util.xml.parse.policy.ParserPolicy; 011 import org.util.xml.element.Attribute; 012 import org.util.xml.element.TagElement; 013 import org.util.xml.element.TextElement; 014 015 /** 016 * 017 * @author masaru 018 */ 019 public class ElementPartParser extends ParseElement { 020 021 // parsers 022 protected NameParser name_parser_; 023 protected SpaceParser space_parser_; 024 protected AttributeParser attribute_parser_; 025 protected TextElementParser text_element_parser_; 026 protected CommentInnerTagElementParser comment_inner_tag_element_parser_; 027 028 protected ParserPolicy policy_; 029 030 protected boolean is_novalue_occuered_; 031 protected boolean is_error_; 032 033 protected TagElement tag_element_; 034 protected TextElement text_element_; 035 protected String endtag_name_; 036 protected boolean is_start_tag_; 037 protected StringBuffer error_text_; 038 039 public ElementPartParser() { 040 this(null); 041 } 042 043 public ElementPartParser(ParserPolicy policy) { 044 policy_ = policy; 045 046 name_parser_ = new NameParser(); 047 space_parser_ = new SpaceParser(); 048 attribute_parser_ = new AttributeParser(); 049 text_element_parser_ = new TextElementParser(); 050 comment_inner_tag_element_parser_ = new CommentInnerTagElementParser(); 051 error_text_ = new StringBuffer(); 052 } 053 054 @Override 055 public boolean match(char c) { 056 return c=='<'; 057 } 058 059 @Override 060 public int parse(int c, ElementParser parser) throws Exception { 061 062 is_error_ = false; 063 is_start_tag_ = false; 064 endtag_name_ = null; 065 text_element_ = null; 066 tag_element_ = null; 067 is_novalue_occuered_ = false; 068 069 int next_word_ = -1; 070 int state = 0; 071 try{ 072 073 while(true) { 074 075 //System.out.println("state:"+state+"|"+(char)c); 076 if(state == 0) { 077 if(c=='<') state = 2; 078 else if(isSpace(c)) ; 079 else if(text_element_parser_.match((char)c)){ 080 c = text_element_parser_.parse(c, parser); 081 text_element_ = new TextElement(text_element_parser_.getReturnValue()); 082 break; 083 } else escape(parser, "parse error: cannot read tag"); 084 }else if(state == 1) { 085 if(c=='<') state = 2; 086 else throw new Exception("parse error: cannot read tag"); 087 }else if(state == 2) { 088 if((c=='/')) state = 6; 089 else if(c=='?') { 090 state = 9; 091 } else if(comment_inner_tag_element_parser_.match((char)c)) { 092 c = comment_inner_tag_element_parser_.parse(c, parser); 093 text_element_ = comment_inner_tag_element_parser_.getResult(); 094 break; 095 } else if(name_parser_.match((char)c)) { 096 c = name_parser_.parse(c, parser); 097 String key = name_parser_.getReturnValue(); 098 tag_element_ = new TagElement(key); 099 if(policy_ != null && policy_.forceEmptyTag(key)) 100 tag_element_.setEmpty(true); 101 else 102 is_start_tag_ = true; 103 state = 10; 104 continue; 105 } else throw new Exception("parse error: cannot read tag: this char is not allowed ("+(char)c+")"); 106 }else if(state == 3) { 107 next_word_ = c; 108 break; 109 } else if(state == 4) { 110 tag_element_.setEmpty(true); 111 is_start_tag_ = false; 112 if(c=='>') break; 113 else throw new Exception("parse error: cannot read tag"); 114 }else if(state == 6) { 115 c = name_parser_.parse(c, parser); 116 endtag_name_ = name_parser_.getReturnValue(); 117 state = 7; 118 continue; 119 }else if(state == 7) { 120 if(c=='>') { 121 break; 122 } else if(space_parser_.match((char)c)) { 123 c = space_parser_.parse(c, parser); 124 continue; 125 } else throw new Exception("parse error: cannot read tag"); 126 }else if(state == 9) { 127 c = name_parser_.parse(c, parser); 128 tag_element_ = new TagElement(name_parser_.getReturnValue()); 129 tag_element_.setPI(true); 130 state = 10; 131 continue; 132 }else if(state == 10) { 133 if(space_parser_.match((char)c)) { 134 c = space_parser_.parse(c, parser); 135 state = 11; 136 continue; 137 } else { 138 if(tag_element_.isPI()){ 139 if(c=='?') state = 4; 140 else return escape(parser,"in <? ... ?> tag"); 141 } else { 142 if(c=='>') break; 143 else if(c=='/') state = 4; 144 else if(is_novalue_occuered_) { 145 c = attribute_parser_.parse(c, parser); 146 Attribute attribute = attribute_parser_.getAttribute(); 147 is_novalue_occuered_ = attribute.isNovalue(); 148 tag_element_.addAttribute(attribute); 149 state = 10; 150 continue; 151 } else return escape(parser,"cannot read "+(char)c); 152 } 153 } 154 }else if(state == 11) { 155 if(attribute_parser_.match((char)c)) { 156 c = attribute_parser_.parse(c, parser); 157 Attribute attribute = attribute_parser_.getAttribute(); 158 is_novalue_occuered_ = attribute.isNovalue(); 159 tag_element_.addAttribute(attribute); 160 state = 10; 161 continue; 162 } else { 163 if(tag_element_.isPI()){ 164 if(c=='?') state = 4; 165 else throw new Exception("parse error: cannot read tag"); 166 } else { 167 if(c=='>') break; 168 else if(c=='/') state = 4; 169 else throw new Exception("parse error: cannot read tag"); 170 } 171 } 172 } 173 if(state==0) 174 c = parser.get(); 175 else 176 c = parser.getChar(); 177 } 178 179 } catch(Exception e) { 180 is_error_ = true; 181 StringWriter sw = new StringWriter(); 182 e.printStackTrace(new PrintWriter(sw)); 183 if(policy_.throwExceptionIfDocumentHasError()) 184 error_text_.append(sw.toString()); 185 parser.escape(e.getMessage()); 186 } 187 188 if(text_element_!=null) 189 return c; 190 else 191 return parser.get(); 192 } 193 194 public int escape (ElementParser parser,String message) throws Exception { 195 try{throw new Exception("mark");}catch(Exception e){e.printStackTrace();} 196 is_error_ = true; 197 System.err.println("this documents has error: "+message); 198 System.err.println("skip---------------------"); 199 int c = parser.get(); 200 System.err.print((char)c); 201 while(c!='>' && c!=-1) System.err.print((char)(c=parser.get())); 202 // for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get())); 203 System.err.println("\n-------------------------"); 204 return parser.get(); 205 } 206 public boolean isTagElement() { 207 return (tag_element_!=null); 208 } 209 public boolean isTextElement() { 210 return (text_element_!=null); 211 } 212 public boolean isStartTag() { 213 return is_start_tag_; 214 } 215 public TextElement getTextElement() { 216 return text_element_; 217 } 218 public TagElement getTagElement() { 219 return tag_element_; 220 } 221 public String getEndTagName() { 222 return endtag_name_; 223 } 224 }