ofx_preproc.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002           ofx_preproc.cpp 
00003                              -------------------
00004     copyright            : (C) 2002 by Benoit Gr�oir
00005     email                : bock@step.polymtl.ca
00006 ***************************************************************************/
00012 /***************************************************************************
00013  *                                                                         *
00014  *   This program is free software; you can redistribute it and/or modify  *
00015  *   it under the terms of the GNU General Public License as published by  *
00016  *   the Free Software Foundation; either version 2 of the License, or     *
00017  *   (at your option) any later version.                                   *
00018  *                                                                         *
00019  ***************************************************************************/
00020 #include <iostream>
00021 #include <fstream>
00022 #include <stdlib.h>
00023 #include <stdio.h>
00024 #include <string>
00025 #include "ParserEventGeneratorKit.h"
00026 #include "libofx.h"
00027 #include "messages.hh"
00028 #include "ofx_sgml.hh"
00029 #include "ofc_sgml.hh"
00030 #include "ofx_preproc.hh"
00031 
00032 using namespace std;
00036 #ifdef MAKEFILE_DTD_PATH
00037 const int DTD_SEARCH_PATH_NUM = 4;
00038 #else
00039 const int DTD_SEARCH_PATH_NUM = 3;
00040 #endif
00041  
00045 const char *DTD_SEARCH_PATH[DTD_SEARCH_PATH_NUM] = { 
00046 #ifdef MAKEFILE_DTD_PATH
00047   MAKEFILE_DTD_PATH , 
00048 #endif
00049   "/usr/local/share/libofx/dtd/", 
00050   "/usr/share/libofx/dtd/", 
00051   "~/"};
00052 const unsigned int READ_BUFFER_SIZE = 1024;
00053 
00058 CFCT int ofx_proc_file(LibofxContextPtr ctx, const char * p_filename)
00059   {
00060   LibofxContext *libofx_context;
00061   bool ofx_start=false;
00062   bool ofx_end=false;
00063 
00064   ifstream input_file;
00065   ofstream tmp_file;
00066   char buffer[READ_BUFFER_SIZE];
00067   string s_buffer;
00068   char *filenames[3];
00069   char tmp_filename[50];
00070 
00071   libofx_context=(LibofxContext*)ctx;
00072 
00073   if(p_filename!=NULL&&strcmp(p_filename,"")!=0)
00074     {
00075     message_out(DEBUG, string("ofx_proc_file():Opening file: ")+ p_filename);
00076     
00077     input_file.open(p_filename);
00078     strncpy(tmp_filename,"/tmp/libofxtmpXXXXXX",50);
00079     mkstemp(tmp_filename);
00080     tmp_file.open(tmp_filename);
00081 
00082     message_out(DEBUG,"ofx_proc_file(): Creating temp file: "+string(tmp_filename));
00083     if(!input_file){
00084       message_out(ERROR,"ofx_proc_file():Unable to open the input file "+string(p_filename));
00085     }
00086     else if(!tmp_file){
00087       message_out(ERROR,"ofx_proc_file():Unable to open the output file "+string(tmp_filename));
00088     }
00089     else
00090       {
00091 
00092         do {
00093           input_file.getline(buffer, sizeof(buffer),'\n');
00094           //cout<<buffer<<"\n";
00095           s_buffer.assign(buffer);
00096           //cout<<"input_file.gcount(): "<<input_file.gcount()<<" sizeof(buffer): "<<sizeof(buffer)<<endl;
00097           if(input_file.gcount()<(sizeof(buffer)-1))
00098             {
00099               s_buffer.append("\n");
00100             }
00101           else if( !input_file.eof()&&input_file.fail())
00102             {
00103               input_file.clear();
00104             }
00105           int ofx_start_idx;
00106           if (ofx_start==false &&
00107               (
00108                (libofx_context->currentFileType()==OFX&&
00109                 ((ofx_start_idx=s_buffer.find("<OFX>"))!=
00110                  string::npos||(ofx_start_idx=s_buffer.find("<ofx>"))!=string::npos))
00111                || (libofx_context->currentFileType()==OFC&&
00112                    ((ofx_start_idx=s_buffer.find("<OFC>"))!=string::npos||
00113                     (ofx_start_idx=s_buffer.find("<ofc>"))!=string::npos))
00114               )
00115              )
00116             {
00117               ofx_start=true;
00118               s_buffer.erase(0,ofx_start_idx);//Fix for really broken files that don't have a newline after the header.
00119               message_out(DEBUG,"ofx_proc_file():<OFX> or <OFC> has been found");
00120             }
00121 
00122           if(ofx_start==true && ofx_end==false){
00123             s_buffer=sanitize_proprietary_tags(s_buffer);
00124             //cout<< s_buffer<<"\n";
00125             tmp_file.write(s_buffer.c_str(), s_buffer.length());
00126           }
00127           
00128           if (ofx_start==true &&
00129               (
00130                (libofx_context->currentFileType()==OFX &&
00131                 ((ofx_start_idx=s_buffer.find("</OFX>"))!=string::npos ||
00132                  (ofx_start_idx=s_buffer.find("</ofx>"))!=string::npos))
00133                || (libofx_context->currentFileType()==OFC &&
00134                    ((ofx_start_idx=s_buffer.find("</OFC>"))!=string::npos ||
00135                     (ofx_start_idx=s_buffer.find("</ofc>"))!=string::npos))
00136               )
00137              )
00138             {
00139               ofx_end=true;
00140               message_out(DEBUG,"ofx_proc_file():</OFX> or </OFC>  has been found");
00141             }
00142 
00143         } while(!input_file.eof()&&!input_file.bad());
00144       }
00145     input_file.close();
00146     tmp_file.close();
00147 
00148     char filename_openspdtd[255];
00149     char filename_dtd[255];
00150     char filename_ofx[255];
00151     strncpy(filename_openspdtd,find_dtd(OPENSPDCL_FILENAME).c_str(),255);//The opensp sgml dtd file
00152     if(libofx_context->currentFileType()==OFX)
00153       {
00154         strncpy(filename_dtd,find_dtd(OFX160DTD_FILENAME).c_str(),255);//The ofx dtd file
00155       }
00156     else if(libofx_context->currentFileType()==OFC)
00157       {
00158         strncpy(filename_dtd,find_dtd(OFCDTD_FILENAME).c_str(),255);//The ofc dtd file
00159       }
00160     else
00161       {
00162         message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00163       }
00164 
00165     if((string)filename_dtd!="" && (string)filename_openspdtd!="")
00166       {
00167         strncpy(filename_ofx,tmp_filename,255);//The processed ofx file
00168         filenames[0]=filename_openspdtd;
00169         filenames[1]=filename_dtd;
00170         filenames[2]=filename_ofx;
00171         if(libofx_context->currentFileType()==OFX)
00172           {
00173             ofx_proc_sgml(libofx_context, 3,filenames);
00174           }
00175         else if(libofx_context->currentFileType()==OFC)
00176           {
00177             ofc_proc_sgml(libofx_context, 3,filenames);
00178           }
00179         else
00180           {
00181             message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00182           }
00183         if(remove(tmp_filename)!=0)
00184           {
00185             message_out(ERROR,"ofx_proc_file(): Error deleting temporary file "+string(tmp_filename));
00186           }
00187       }
00188     else
00189       {
00190         message_out(ERROR,"ofx_proc_file(): FATAL: Missing DTD, aborting");
00191       }
00192   }
00193   else{
00194     message_out(ERROR,"ofx_proc_file():No input file specified");
00195   }
00196   return 0;
00197 }
00198 
00199 
00200 
00201 CFCT int libofx_proc_buffer(LibofxContextPtr ctx,
00202                             const char *s, unsigned int size){
00203   ofstream tmp_file;
00204   string s_buffer;
00205   char *filenames[3];
00206   char tmp_filename[50];
00207   int pos;
00208   LibofxContext *libofx_context;
00209 
00210   libofx_context=(LibofxContext*)ctx;
00211 
00212   if (size==0) {
00213     message_out(ERROR,
00214                 "ofx_proc_file(): bad size");
00215     return -1;
00216   }
00217   s_buffer=string(s, size);
00218 
00219   strncpy(tmp_filename,"/tmp/libofxtmpXXXXXX",50);
00220   mkstemp(tmp_filename);
00221   tmp_file.open(tmp_filename);
00222 
00223   message_out(DEBUG,"ofx_proc_file(): Creating temp file: "+string(tmp_filename));
00224   if(!tmp_file){
00225     message_out(ERROR,"ofx_proc_file():Unable to open the output file "+string(tmp_filename));
00226     return -1;
00227   }
00228 
00229   if (libofx_context->currentFileType()==OFX) {
00230     pos=s_buffer.find("<OFX>");
00231     if (pos==string::npos)
00232       pos=s_buffer.find("<ofx>");
00233   }
00234   else if (libofx_context->currentFileType()==OFC) {
00235     pos=s_buffer.find("<OFC>");
00236     if (pos==string::npos)
00237       pos=s_buffer.find("<ofc>");
00238   }
00239   else {
00240     message_out(ERROR,"ofx_proc(): unknown file type");
00241     return -1;
00242   }
00243   if (pos==string::npos || pos > s_buffer.size()) {
00244     message_out(ERROR,"ofx_proc():<OFX> has not been found");
00245     return -1;
00246   }
00247   else {
00248     // erase everything before the OFX tag
00249     s_buffer.erase(0, pos);
00250     message_out(DEBUG,"ofx_proc_file():<OF?> has been found");
00251   }
00252 
00253   if (libofx_context->currentFileType()==OFX) {
00254     pos=s_buffer.find("</OFX>");
00255     if (pos==string::npos)
00256       pos=s_buffer.find("</ofx>");
00257   }
00258   else if (libofx_context->currentFileType()==OFC) {
00259     pos=s_buffer.find("</OFC>");
00260     if (pos==string::npos)
00261       pos=s_buffer.find("</ofc>");
00262   }
00263   else {
00264     message_out(ERROR,"ofx_proc(): unknown file type");
00265     return -1;
00266   }
00267 
00268   if (pos==string::npos || pos > s_buffer.size()) {
00269     message_out(ERROR,"ofx_proc():</OF?> has not been found");
00270     return -1;
00271   }
00272   else {
00273     // erase everything after the /OFX tag
00274     if (s_buffer.size() > pos+6)
00275       s_buffer.erase(pos+6);
00276     message_out(DEBUG,"ofx_proc_file():<OFX> has been found");
00277   }
00278 
00279   s_buffer=sanitize_proprietary_tags(s_buffer);
00280   tmp_file.write(s_buffer.c_str(), s_buffer.length());
00281 
00282   tmp_file.close();
00283 
00284   char filename_openspdtd[255];
00285   char filename_dtd[255];
00286   char filename_ofx[255];
00287   strncpy(filename_openspdtd,find_dtd(OPENSPDCL_FILENAME).c_str(),255);//The opensp sgml dtd file
00288   if(libofx_context->currentFileType()==OFX){
00289     strncpy(filename_dtd,find_dtd(OFX160DTD_FILENAME).c_str(),255);//The ofx dtd file
00290   }
00291   else if(libofx_context->currentFileType()==OFC){
00292     strncpy(filename_dtd,find_dtd(OFCDTD_FILENAME).c_str(),255);//The ofc dtd file
00293   }
00294   else {
00295     message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00296   }
00297 
00298   if((string)filename_dtd!="" && (string)filename_openspdtd!=""){
00299     strncpy(filename_ofx,tmp_filename,255);//The processed ofx file
00300     filenames[0]=filename_openspdtd;
00301     filenames[1]=filename_dtd;
00302     filenames[2]=filename_ofx;
00303     if(libofx_context->currentFileType()==OFX){
00304       ofx_proc_sgml(libofx_context, 3,filenames);
00305     }
00306     else if(libofx_context->currentFileType()==OFC){
00307       ofc_proc_sgml(libofx_context, 3,filenames);
00308     }
00309     else {
00310       message_out(ERROR,string("ofx_proc_file(): Error unknown file format for the OFX parser"));
00311     }
00312     if(remove(tmp_filename)!=0){
00313       message_out(ERROR,"ofx_proc_file(): Error deleting temporary file "+string(tmp_filename));
00314     }
00315   }
00316   else {
00317     message_out(ERROR,"ofx_proc_file(): FATAL: Missing DTD, aborting");
00318   }
00319 
00320   return 0;
00321 }
00322 
00323 
00324 
00325 
00326 
00327 
00332 string sanitize_proprietary_tags(string input_string)
00333 {
00334   unsigned int i;
00335   size_t input_string_size;
00336   bool strip=false;
00337   bool tag_open=false;
00338   int tag_open_idx=0;//Are we within < > ?
00339   bool closing_tag_open=false;//Are we within </ > ?
00340   int orig_tag_open_idx=0;
00341   bool proprietary_tag=false; //Are we within a proprietary element?
00342   bool proprietary_closing_tag=false;
00343   int crop_end_idx=0;
00344   char buffer[READ_BUFFER_SIZE]="";
00345   char tagname[READ_BUFFER_SIZE]="";
00346   int tagname_idx=0;
00347   char close_tagname[READ_BUFFER_SIZE]="";
00348  
00349   for(i=0;i<READ_BUFFER_SIZE;i++){
00350     buffer[i]=0;
00351     tagname[i]=0;
00352     close_tagname[i]=0;
00353   }
00354   
00355   input_string_size=input_string.size();
00356   
00357   for(i=0;i<=input_string_size;i++){
00358     if(input_string.c_str()[i]=='<'){
00359       tag_open=true;
00360       tag_open_idx=i;
00361       if(proprietary_tag==true&&input_string.c_str()[i+1]=='/'){
00362         //We are now in a closing tag
00363         closing_tag_open=true;
00364         //cout<<"Comparaison: "<<tagname<<"|"<<&(input_string.c_str()[i+2])<<"|"<<strlen(tagname)<<endl;
00365         if(strncmp(tagname,&(input_string.c_str()[i+2]),strlen(tagname))!=0){
00366           //If it is the begining of an other tag
00367           //cout<<"DIFFERENT!"<<endl;
00368           crop_end_idx=i-1;
00369           strip=true;
00370         }
00371         else{
00372           //Otherwise, it is the start of the closing tag of the proprietary tag
00373           proprietary_closing_tag=true;
00374         }
00375       }
00376       else if(proprietary_tag==true){
00377         //It is the start of a new tag, following a proprietary tag
00378         crop_end_idx=i-1;
00379         strip=true;
00380       }
00381     }
00382     else if(input_string.c_str()[i]=='>'){
00383       tag_open=false;
00384       closing_tag_open=false;
00385       tagname[tagname_idx]=0;
00386       tagname_idx=0;
00387       if(proprietary_closing_tag==true){
00388         crop_end_idx=i;
00389         strip=true;
00390       }
00391     }
00392     else if(tag_open==true&&closing_tag_open==false){
00393       if(input_string.c_str()[i]=='.'){
00394         if(proprietary_tag!=true){
00395           orig_tag_open_idx = tag_open_idx;
00396           proprietary_tag=true;
00397         }
00398       }
00399       tagname[tagname_idx]=input_string.c_str()[i];
00400       tagname_idx++;
00401     }
00402     //cerr <<i<<endl;
00403     if(strip==true)
00404       {
00405         input_string.copy(buffer,(crop_end_idx-orig_tag_open_idx)+1,orig_tag_open_idx);
00406         message_out(INFO,"sanitize_proprietary_tags() (end tag or new tag) removed: "+string(buffer));
00407         input_string.erase(orig_tag_open_idx,(crop_end_idx-orig_tag_open_idx)+1);
00408         i=orig_tag_open_idx-1;
00409         proprietary_tag=false;
00410         proprietary_closing_tag=false;
00411         closing_tag_open=false;
00412         tag_open=false;
00413         strip=false;
00414       }
00415 
00416   }//end for
00417   if(proprietary_tag==true){
00418     if(crop_end_idx==0){//no closing tag
00419       crop_end_idx=input_string.size()-1;
00420     }
00421     input_string.copy(buffer,(crop_end_idx-orig_tag_open_idx)+1,orig_tag_open_idx);
00422     message_out(INFO,"sanitize_proprietary_tags() (end of line) removed: "+string(buffer));
00423     input_string.erase(orig_tag_open_idx,(crop_end_idx-orig_tag_open_idx)+1);
00424   }
00425   return input_string;
00426 }
00427 
00428 
00429 
00435 string find_dtd(string dtd_filename)
00436 {
00437   int i;
00438   ifstream dtd_file;
00439   string dtd_path_filename;
00440   bool dtd_found=false;
00441 
00442   for(i=0;i<DTD_SEARCH_PATH_NUM&&dtd_found==false;i++){
00443     dtd_path_filename=DTD_SEARCH_PATH[i];
00444     dtd_path_filename.append(dtd_filename);
00445     dtd_file.clear();
00446     dtd_file.open(dtd_path_filename.c_str());
00447     if(!dtd_file){
00448       message_out(DEBUG,"find_dtd():Unable to open the file "+dtd_path_filename);
00449     }
00450     else{
00451       message_out(STATUS,"find_dtd():DTD found: "+dtd_path_filename);
00452       dtd_file.close();
00453       dtd_found=true;
00454     }
00455   }
00456   if(dtd_found==false){
00457     message_out(ERROR,"find_dtd():Unable to find the DTD named " + dtd_filename);
00458     dtd_path_filename="";
00459   }
00460   return dtd_path_filename;
00461 }
00462 
00463 

Generated on Mon Jan 8 22:35:46 2007 for LibOFX by  doxygen 1.4.7