001    /*
002     * To change this template, choose Tools | Templates
003     * and open the template in the editor.
004     */
005    
006    package org.util.xml.parse;
007    
008    import org.util.xml.parse.policy.ParserPolicy;
009    import java.io.BufferedReader;
010    import java.io.File;
011    import java.io.InputStream;
012    import java.io.InputStreamReader;
013    import java.io.Reader;
014    import java.net.URI;
015    import java.util.ArrayList;
016    import javax.swing.JOptionPane;
017    import org.util.xml.element.Attributes;
018    import org.util.xml.element.Element;
019    import org.util.xml.element.TagElement;
020    import org.util.xml.element.TextElement;
021    import org.util.xml.parse.policy.*;
022    
023    /**
024     *
025     * @author masaru
026     */
027    public class ElementParser {
028    
029        private Reader reader_;
030        protected static ElementPartParser element_part_parser_;
031        private Element[] result_;
032        private ParserPolicy policy_;
033        private int tab_count_;
034        private String encoding_ = "unknown";
035        private boolean select_encoding_after_readeing_first_line_;
036        private InputStream is_;
037        private ElementParser data_source_;
038        private URI document_base_;
039        private ArrayList<ParserPolicy> policy_stack_ = new ArrayList<ParserPolicy>();
040        
041        protected ElementParser(){}
042        public ElementParser(Reader reader) throws Exception {
043            init(reader);
044        }
045        public ElementParser(InputStream is) throws Exception {
046            init(is);
047        }
048        public ElementParser(InputStream is, String encoding) throws Exception {
049            init(new InputStreamReader(is,encoding));
050        }
051        public void setDocumentBase(URI document_base) {
052            document_base_ = document_base;
053        }
054        public URI getDocumentBase() {
055            return document_base_;
056        }
057        public Element[] createSubElements(String sub_path) throws Exception {
058            ElementParser sub_parser = createSubParser(sub_path);
059            sub_parser.parse();
060            return sub_parser.getResult();
061            
062        }
063        public ElementParser createSubParser(String sub_path) throws Exception {
064            URI uri = null;
065            if(document_base_ != null)
066                uri = document_base_.resolve(sub_path);
067            else {
068                uri = new File(sub_path).toURI();
069            }
070            ElementParser sub_parser = new ElementParser(uri.toURL().openStream());
071            sub_parser.setDocumentBase(uri.resolve(".."));
072            sub_parser.setPolicy(policy_);
073            return sub_parser;
074        }
075        
076        private void initParsers() {
077            if(policy_==null) {
078                policy_ = new DefaultParserPolicy() {
079                    String encoding_;
080                    public boolean forceEmptyTag(String key) {
081                        return false;
082                       }
083                    public Element allowElement(Element element) {
084                        if(encoding_ == null) {
085                            if(element.isTagElement()){
086                                TagElement te = (TagElement)element;
087                                if(te.isPI())
088                                    encoding_ = te.getAttributeValue("encoding");
089                            }
090                            if(encoding_ == null) encoding_ = "utf-8";
091                        }
092                        return element;
093                    }
094    
095                    public String selectEncoding(String last_tag_key) {
096                        return encoding_;
097                    }
098                };
099            }
100            element_part_parser_ = new ElementPartParser(policy_);
101        }
102        
103        public void setPolicy(ParserPolicy policy) {
104            policy_ = policy;
105            initParsers();
106        }
107    
108        private void init(InputStream is) throws Exception {
109            initParsers();
110            select_encoding_after_readeing_first_line_ = true;
111            is_ = is;
112        }
113        private void init(Reader reader){
114            initParsers();
115            if(reader instanceof InputStreamReader){
116                encoding_ = ((InputStreamReader)reader).getEncoding();
117                reader_ = new BufferedReader(reader);
118            }else
119                reader_ = reader;
120        }
121        
122        public void error(ParseElement source) {
123            
124            System.err.println("error: ");
125            System.err.println(source);
126            try{
127                for(int i=0;i<1000;i++) {
128                    System.err.print((char)get());
129                }
130            }catch(Exception e) {}
131        }
132        
133        public Element[] parse() throws Exception{
134            tab_count_ = 0;
135    
136            ArrayList<Element> list = new ArrayList<Element>();
137            
138            if(reader_ != null)
139                data_source_ = this;
140            else {
141                data_source_ = new ElementParser() {
142                    public int get() throws Exception { return is_.read(); }
143                };
144            }
145            element_part_parser_.error_text_ = new StringBuffer();
146            
147            int last = parse(data_source_.get(), list);
148    
149            /*if( last == -1)
150            System.out.println("end of stream.(ok)");
151            else
152            System.out.println("! Not end of stream !");*/
153            
154            result_ = list.toArray(new Element[]{});
155            
156            //for(int i=0;i<list.size();i++)
157              //  System.out.println(list.get(i).toString());
158            return result_;
159        }
160        
161        private int parse(int next, ArrayList<Element> list) throws Exception {
162    
163            Element element = null;
164            
165            while(next!=-1) {
166                
167                next = element_part_parser_.parse(next, data_source_);
168    
169                if(element_part_parser_.is_error_ && policy_.throwExceptionIfDocumentHasError())
170                    throw new Exception(element_part_parser_.error_text_.toString());
171                
172                if(element_part_parser_.isTextElement()) {
173                    TextElement text_element = element_part_parser_.getTextElement();
174                    element = policy_.allowElement(text_element);
175    
176                } else if(element_part_parser_.isTagElement()){
177                    
178                    TagElement tag_element = element_part_parser_.getTagElement();
179    //System.out.println(tag_element);
180    //JOptionPane.showMessageDialog(null, tag_element.getKey());
181                    
182                    if(reader_ == null) {
183                        String encoding = policy_.selectEncoding(tag_element.getKey());
184                        if(encoding != null) {
185    //System.out.println("set encoding: "+encoding);
186                            encoding_ = encoding;
187                            reader_ = new BufferedReader(new InputStreamReader(is_, encoding));
188                            data_source_ = this;
189                        }
190                    }
191                    
192    //System.out.println("\nkey: "+tag_element.getKey());
193    //System.out.println("att: "+tag_element.getAttributes());
194    
195                    if(element_part_parser_.isStartTag()){
196    //JOptionPane.showMessageDialog(null, "start tag:\n");
197                                                    policy_stack_.add(policy_);
198                        policy_ = policy_.getInnerPolicy(tag_element);
199    
200                        tab_count_++;
201                        String start_key = tag_element.getKey();
202    //System.out.println("start tag: "+start_key);
203                        
204                        ArrayList<Element> children = new ArrayList<Element>();
205                        next = parse(next, children);
206    
207                        String end_key = element_part_parser_.getEndTagName();
208                        if(policy_.checkEndTag())
209                            if(!start_key.equals(end_key) && policy_.throwExceptionIfDocumentHasError()) {
210                                String message = "end tag does not match! (start:"+start_key+" end:"+end_key+")";
211                                if(element_part_parser_.is_error_)
212                                    element_part_parser_.error_text_.append(message);
213                                else
214                                    throw new Exception(message);
215                            }
216    //                        throw new Exception("parse error: "+end_key+" does not match "+start_key);
217                        
218                        tag_element.setChildren(children.toArray(new Element[]{}));
219                        tab_count_--;
220    //System.out.println("end children :"+tag_element.getKey());
221                                            if(policy_stack_.size()>0)
222                                                    policy_ = policy_stack_.remove(policy_stack_.size()-1);
223                    }
224                    
225    //JOptionPane.showMessageDialog(null, "add to list:\n"+tag_element.getKey());
226                                            tag_element.setDocumentBase(getDocumentBase());
227    
228                    element = policy_.allowElement(tag_element);
229    
230                    //System.out.println("add-----------------------");
231                } else { // end tag
232                    if(!policy_.forceEmptyTag(element_part_parser_.getEndTagName()))
233                        return next;
234                    element = null;
235                }
236                
237                if(element != null) list.add(element);
238    
239            }
240            return -1;
241            
242        }
243        
244        public int escape (String message) throws Exception {
245            try{throw new Exception("mark");}catch(Exception e){e.printStackTrace();}
246            System.err.println("this documents has error: "+message);
247            System.err.println("skip---------------------");
248            int c = get();
249            System.err.print((char)c);
250            while(c!='>' && c!=-1) System.err.print((char)(c=get()));
251    //        for(int i=0;i<3000;i++) System.err.print((char)(c=parser.get()));
252            System.err.println("\n-------------------------");
253            return get();
254        }
255        
256        public Element[] getResult() {
257            return result_;
258        }
259        public TagElement getFirstPlainTagElement() {
260            for(Element tmp : result_)
261                if(tmp.isTagElement()) {
262                    TagElement tag = (TagElement)tmp;
263                    if(!tag.isPI())
264                        return tag;
265                }
266            return null;
267        }
268        
269        public String getEncoding() {
270            return encoding_;
271        }
272        
273        int counter = 0;
274        long start = System.currentTimeMillis();
275        public int get() throws Exception {
276            return reader_.read();
277    
278    //        int val = reader_.read();
279    //        counter++;
280    //System.out.print("["+(char)val+"]");
281    //        return val;
282        }
283        public char getChar() throws Exception {
284            int b = get();
285            if(b==-1) throw new Exception("end of stream.");
286            return (char)b;
287        }
288    }