001    /*
002     * To change this template, choose Tools | Templates
003     * and open the template in the editor.
004     */
005    
006    package org.util.xml.parse;
007    
008    import java.io.PrintWriter;
009    import java.io.StringWriter;
010    import org.util.xml.parse.policy.ParserPolicy;
011    import org.util.xml.element.Attribute;
012    import org.util.xml.element.TagElement;
013    import org.util.xml.element.TextElement;
014    
015    /**
016     *
017     * @author masaru
018     */
019    public class ElementPartParser extends ParseElement {
020    
021        // parsers
022        protected NameParser name_parser_;
023        protected SpaceParser space_parser_;
024        protected AttributeParser attribute_parser_;
025        protected TextElementParser text_element_parser_;
026        protected CommentInnerTagElementParser comment_inner_tag_element_parser_;
027    
028        protected ParserPolicy policy_;
029        
030        protected boolean is_novalue_occuered_;
031        protected boolean is_error_;
032        
033        protected TagElement tag_element_;
034        protected TextElement text_element_;
035        protected String endtag_name_;
036        protected boolean is_start_tag_;
037        protected StringBuffer error_text_;
038        
039        public ElementPartParser() {
040            this(null);
041        }
042        
043        public ElementPartParser(ParserPolicy policy) {
044            policy_ = policy;
045    
046            name_parser_ = new NameParser();
047            space_parser_ = new SpaceParser();
048            attribute_parser_ = new AttributeParser();
049            text_element_parser_ = new TextElementParser();
050            comment_inner_tag_element_parser_ = new CommentInnerTagElementParser();
051            error_text_ = new StringBuffer();
052        }
053        
054        @Override
055        public boolean match(char c) {
056            return c=='<';
057        }
058    
059        @Override
060        public int parse(int c, ElementParser parser) throws Exception {
061    
062            is_error_ = false;
063            is_start_tag_ = false;
064            endtag_name_ = null;
065            text_element_ = null;
066            tag_element_ = null;
067            is_novalue_occuered_ = false;
068            
069            int next_word_ = -1;
070            int state = 0;
071    try{
072        
073            while(true) {
074                
075                //System.out.println("state:"+state+"|"+(char)c);
076                if(state == 0) {
077                    if(c=='<') state = 2;
078                    else if(isSpace(c)) ;
079                    else if(text_element_parser_.match((char)c)){
080                        c = text_element_parser_.parse(c, parser);
081                        text_element_ = new TextElement(text_element_parser_.getReturnValue());
082                        break;
083                    } else escape(parser, "parse error: cannot read tag");
084                }else if(state == 1) {
085                    if(c=='<') state = 2;
086                    else throw new Exception("parse error: cannot read tag");
087                }else if(state == 2) {
088                    if((c=='/')) state = 6;
089                    else if(c=='?') {
090                        state = 9;
091                    } else if(comment_inner_tag_element_parser_.match((char)c)) {
092                        c = comment_inner_tag_element_parser_.parse(c, parser);
093                        text_element_ = comment_inner_tag_element_parser_.getResult();
094                        break;
095                    } else if(name_parser_.match((char)c)) {
096                        c = name_parser_.parse(c, parser);
097                        String key = name_parser_.getReturnValue();
098                        tag_element_ = new TagElement(key);
099                        if(policy_ != null && policy_.forceEmptyTag(key))
100                            tag_element_.setEmpty(true);
101                        else
102                            is_start_tag_ = true;
103                        state = 10;
104                        continue;
105                    } else throw new Exception("parse error: cannot read tag: this char is not allowed ("+(char)c+")");
106                }else if(state == 3) {
107                    next_word_ = c;
108                    break;
109                } else if(state == 4) {
110                    tag_element_.setEmpty(true);
111                    is_start_tag_ = false;
112                    if(c=='>') break;
113                    else throw new Exception("parse error: cannot read tag");
114                }else if(state == 6) {
115                    c = name_parser_.parse(c, parser);
116                    endtag_name_ = name_parser_.getReturnValue();
117                    state = 7;
118                    continue;
119                }else if(state == 7) {
120                    if(c=='>') {
121                        break;
122                    } else if(space_parser_.match((char)c)) {
123                        c = space_parser_.parse(c, parser);
124                        continue;
125                    } else throw new Exception("parse error: cannot read tag");
126                }else if(state == 9) {
127                    c = name_parser_.parse(c, parser);
128                    tag_element_ = new TagElement(name_parser_.getReturnValue());
129                    tag_element_.setPI(true);
130                    state = 10;
131                    continue;
132                }else if(state == 10) {
133                    if(space_parser_.match((char)c)) {
134                        c = space_parser_.parse(c, parser);
135                        state = 11;
136                        continue;
137                    } else {
138                        if(tag_element_.isPI()){
139                            if(c=='?') state = 4;
140                            else return escape(parser,"in <? ... ?> tag");
141                        } else {
142                            if(c=='>') break;
143                            else if(c=='/') state = 4;
144                            else if(is_novalue_occuered_) {
145                                c = attribute_parser_.parse(c, parser);
146                                Attribute attribute = attribute_parser_.getAttribute();
147                                is_novalue_occuered_ = attribute.isNovalue();
148                                tag_element_.addAttribute(attribute);
149                                state = 10;
150                                continue;
151                            } else return escape(parser,"cannot read "+(char)c);
152                        }
153                    }
154                }else if(state == 11) {
155                    if(attribute_parser_.match((char)c)) {
156                        c = attribute_parser_.parse(c, parser);
157                        Attribute attribute = attribute_parser_.getAttribute();
158                        is_novalue_occuered_ = attribute.isNovalue();
159                        tag_element_.addAttribute(attribute);
160                        state = 10;
161                        continue;
162                    } else {
163                        if(tag_element_.isPI()){
164                            if(c=='?') state = 4;
165                            else throw new Exception("parse error: cannot read tag");
166                        } else {
167                            if(c=='>') break;
168                            else if(c=='/') state = 4;
169                            else throw new Exception("parse error: cannot read tag");
170                        }
171                    }
172                }
173                if(state==0)
174                    c = parser.get();
175                else
176                    c = parser.getChar();
177            }
178            
179    } catch(Exception e) {
180        is_error_ = true;
181        StringWriter sw = new StringWriter();
182        e.printStackTrace(new PrintWriter(sw));
183        if(policy_.throwExceptionIfDocumentHasError())
184            error_text_.append(sw.toString());
185        parser.escape(e.getMessage());
186    }
187            
188            if(text_element_!=null)
189                return c;
190            else
191                return parser.get();
192        }
193        
194        public int escape (ElementParser parser,String message) throws Exception {
195            try{throw new Exception("mark");}catch(Exception e){e.printStackTrace();}
196            is_error_ = true;
197            System.err.println("this documents has error: "+message);
198            System.err.println("skip---------------------");
199            int c = parser.get();
200            System.err.print((char)c);
201            while(c!='>' && c!=-1) System.err.print((char)(c=parser.get()));
202    //        for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get()));
203            System.err.println("\n-------------------------");
204            return parser.get();
205        }
206        public boolean isTagElement() {
207            return (tag_element_!=null);
208        }
209        public boolean isTextElement() {
210            return (text_element_!=null);
211        }
212        public boolean isStartTag() {
213            return is_start_tag_;
214        }
215        public TextElement getTextElement() {
216            return text_element_;
217        }
218        public TagElement getTagElement() {
219            return tag_element_;
220        }
221        public String getEndTagName() {
222            return endtag_name_;
223        }
224    }