/* GOPHER ACCESS HTGopher.c ** ============= ** ** History: ** 26 Sep 90 Adapted from other accesses (News, HTTP) TBL ** 29 Nov 91 Downgraded to C, for portable implementation. */ #include "../config.h" /* Implements: */ #include "HTGopher.h" #define GOPHER_PORT 70 /* See protocol spec */ #define BIG 1024 /* Bug */ #define LINE_LENGTH 256 /* Bug */ /* Gopher entity types: */ #define GOPHER_TEXT '0' #define GOPHER_MENU '1' #define GOPHER_CSO '2' #define GOPHER_ERROR '3' #define GOPHER_MACBINHEX '4' #define GOPHER_PCBINHEX '5' #define GOPHER_UUENCODED '6' #define GOPHER_INDEX '7' #define GOPHER_TELNET '8' #define GOPHER_BINARY '9' #define GOPHER_DUPLICATE '+' #define GOPHER_GIF 'g' #define GOPHER_IMAGE 'I' #define GOPHER_TN3270 'T' #define GOPHER_HTML 'h' /* HTML */ #define GOPHER_WWW 'w' /* W3 address */ #define GOPHER_SOUND 's' #define GOPHER_PLUS_IMAGE ':' #define GOPHER_PLUS_MOVIE ';' #define GOPHER_PLUS_SOUND '<' #define GOPHER_INFO 'i' #include #include "HTUtils.h" /* Coding convention macros */ #include "tcp.h" #include "HTAlert.h" #include "HTParse.h" #include "HTFormat.h" #include "HTFile.h" #include "HTTCP.h" /* Hypertext object building machinery */ #include "HTML.h" #define PUTC(c) (*targetClass.put_character)(target, c) #define PUTS(s) (*targetClass.put_string)(target, s) #define START(e) (*targetClass.start_element)(target, e, 0, 0) #define END(e) (*targetClass.end_element)(target, e) #define END_TARGET (*targetClass.end_document)(target) #define FREE_TARGET (*targetClass.free)(target) struct _HTStructured { WWW_CONST HTStructuredClass * isa; /* ... */ }; #ifndef DISABLE_TRACE extern int www2Trace; #endif PRIVATE HTStructured *target; /* the new hypertext */ PRIVATE HTStructuredClass targetClass; /* Its action routines */ /* Module-wide variables */ PRIVATE int s; /* Socket for GopherHost */ /* Matrix of allowed characters in filenames ** ----------------------------------------- */ PRIVATE BOOL acceptable[256]; PRIVATE BOOL acceptable_inited = NO; PRIVATE void init_acceptable NOARGS { unsigned int i; char * good = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$"; for(i=0; i<256; i++) acceptable[i] = NO; for(;*good; good++) acceptable[(unsigned int)*good] = YES; acceptable_inited = YES; } PRIVATE WWW_CONST char hex[17] = "0123456789abcdef"; /* Decode one hex character */ PRIVATE char from_hex ARGS1(char, c) { return (c>='0')&&(c<='9') ? c-'0' : (c>='A')&&(c<='F') ? c-'A'+10 : (c>='a')&&(c<='f') ? c-'a'+10 : 0; } /* Paste in an Anchor ** ------------------ ** ** The title of the destination is set, as there is no way ** of knowing what the title is when we arrive. ** ** On entry, ** HT is in append mode. ** text points to the text to be put into the file, 0 terminated. ** addr points to the hypertext refernce address 0 terminated. */ PRIVATE void write_anchor ARGS3(WWW_CONST char *,text, WWW_CONST char *,addr, char *, image_text) { PUTS (""); /* Throw in an inlined image, if one has been requested. */ if (image_text) { PUTS (" "); } PUTS(text); PUTS(""); } PRIVATE void write_non_anchor ARGS2(WWW_CONST char *,text, char *, image_text) { /* Throw in an inlined image, if one has been requested. */ if (image_text) { PUTS (" "); } PUTS(text); } /* Parse a Gopher Menu document ** ============================ ** */ PRIVATE int parse_menu ARGS2 ( WWW_CONST char *, arg, HTParentAnchor *, anAnchor) { char gtype; char ch; char line[BIG]; char address[BIG]; char *name, *selector; /* Gopher menu fields */ char *host; char *port; char *p = line; extern int interrupted_in_htgetcharacter; WWW_CONST char *title; #define TAB '\t' #define HEX_ESCAPE '%' HTProgress ("Retrieving Gopher menu."); PUTS("

Gopher Menu

\n"); START(HTML_DL); while ((ch=HTGetCharacter ()) != (char)EOF) { if (interrupted_in_htgetcharacter) { #ifndef DISABLE_TRACE if (www2Trace) fprintf (stderr, "parse_menu: picked up interrupt in htgc\n"); #endif (*targetClass.handle_interrupt)(target); return HT_INTERRUPTED; } if (ch != LF) { *p = ch; /* Put character in line */ if (p< &line[BIG-1]) p++; } else { *p++ = 0; /* Terminate line */ p = line; /* Scan it to parse it */ port = 0; /* Flag "not parsed" */ #ifndef DISABLE_TRACE if (www2Trace) fprintf(stderr, "HTGopher: Menu item: %s\n", line); #endif gtype = *p++; /* Break on line with a dot by itself */ if ((gtype=='.') && ((*p=='\r') || (*p==0))) break; if (gtype && *p) { name = p; selector = strchr(name, TAB); START(HTML_DD); if (selector) { *selector++ = 0; /* Terminate name */ host = strchr(selector, TAB); if (host) { *host++ = 0; /* Terminate selector */ port = strchr(host, TAB); if (port) { char *junk; port[0] = ':'; /* delimit host a la W3 */ junk = strchr(port, TAB); if (junk) *junk++ = 0; /* Chop port */ if ((port[1]=='0') && (!port[2])) port[0] = 0; /* 0 means none */ } /* no port */ } /* host ok */ } /* selector ok */ } /* gtype and name ok */ if (gtype == GOPHER_WWW) { /* Gopher pointer to W3 */ write_anchor(name, selector, "internal-gopher-text"); } else if (port) { /* Other types need port */ if (gtype == GOPHER_TELNET) { if (*selector) sprintf(address, "telnet://%s@%s/", selector, host); else sprintf(address, "telnet://%s/", host); } else if (gtype == GOPHER_TN3270) { if (*selector) sprintf(address, "tn3270://%s@%s/", selector, host); else sprintf(address, "tn3270://%s/", host); } else { /* If parsed ok */ char *q; unsigned char *p; sprintf(address, "//%s/%c", host, gtype); q = address+ strlen(address); for(p=(unsigned char *)selector; *p; p++) { /* Encode selector string */ if (acceptable[*p]) *q++ = *p; else { *q++ = HEX_ESCAPE; /* Means hex coming */ *q++ = hex[(*p) >> 4]; *q++ = hex[(*p) & 15]; } } *q++ = 0; /* terminate address */ } /* Error response from Gopher doesn't deserve to be a hyperlink. */ if (strcmp (address, "//error.host:1/0") != 0 && strcmp (address, "//error/0error") != 0 && strcmp (address, "//:/0") != 0 && gtype != GOPHER_ERROR) { switch (gtype) { case GOPHER_MENU: write_anchor(name, address, "internal-gopher-menu"); break; case GOPHER_TEXT: write_anchor(name, address, "internal-gopher-text"); break; case GOPHER_INDEX: case GOPHER_CSO: write_anchor(name, address, "internal-gopher-index"); break; case GOPHER_IMAGE: case GOPHER_GIF: case GOPHER_PLUS_IMAGE: write_anchor(name, address, "internal-gopher-image"); break; case GOPHER_SOUND: case GOPHER_PLUS_SOUND: write_anchor(name, address, "internal-gopher-sound"); break; case GOPHER_PLUS_MOVIE: write_anchor(name, address, "internal-gopher-movie"); break; case GOPHER_TELNET: case GOPHER_TN3270: write_anchor(name, address, "internal-gopher-telnet"); break; case GOPHER_BINARY: case GOPHER_MACBINHEX: case GOPHER_PCBINHEX: case GOPHER_UUENCODED: write_anchor(name, address, "internal-gopher-binary"); break; case GOPHER_INFO: write_non_anchor(name, NULL); break; default: write_anchor(name, address, "internal-gopher-unknown"); break; } } else { /* Good error handling??? */ PUTS(line); } } else { /* parse error */ #ifndef DISABLE_TRACE if (www2Trace) fprintf(stderr, "HTGopher: Bad menu item.\n"); #endif PUTS(line); } /* parse error */ p = line; /* Start again at beginning of line */ } /* if end of line */ } /* Loop over characters */ if (interrupted_in_htgetcharacter) { #ifndef DISABLE_TRACE if (www2Trace) fprintf (stderr, "parse_menu: picked up interrupt in htgc\n"); #endif (*targetClass.handle_interrupt)(target); return HT_INTERRUPTED; } END(HTML_DL); END_TARGET; FREE_TARGET; HTProgress ("Retrieved Gopher menu."); return 1; } /* Display a Gopher Index document ** ------------------------------- */ PRIVATE void display_index ARGS2 ( WWW_CONST char *, arg, HTParentAnchor *,anAnchor) { PUTS("

Searchable Gopher Index

"); END_TARGET; FREE_TARGET; return; } /* Display a Gopher CSO document ** ----------------------------- */ PRIVATE void display_cso ARGS2 ( WWW_CONST char *, arg, HTParentAnchor *,anAnchor) { PUTS("

Searchable CSO Phonebook

"); END_TARGET; FREE_TARGET; return; } /* Parse a Gopher CSO document ** ============================ ** ** Accepts an open socket to a CSO server waiting to send us ** data and puts it on the screen in a reasonable manner. ** ** Perhaps this data can be automatically linked to some ** other source as well??? ** ** Hacked into place by Lou Montulli@ukanaix.cc.ukans.edu ** */ PRIVATE int parse_cso ARGS2 (WWW_CONST char *, arg, HTParentAnchor *,anAnchor) { char ch; char line[BIG]; char *p = line; char *second_colon, last_char='\0'; extern int interrupted_in_htgetcharacter; HTProgress ("Retrieving CSO search results."); PUTS("

CSO Search Results

\n
");

  /* start grabbing chars from the network */
  while ((ch=HTGetCharacter ()) != (char)EOF) 
    {
      if (interrupted_in_htgetcharacter)
        {
#ifndef DISABLE_TRACE
          if (www2Trace)
            fprintf (stderr, "parse_cso: picked up interrupt in htgc\n");
#endif
          (*targetClass.handle_interrupt)(target);
          return HT_INTERRUPTED;
        }
      if (ch != '\n') 
        {
          *p = ch;		/* Put character in line */
          if (p< &line[BIG-1]) p++;
        } 
      else 
        {
          *p++ = 0;		/* Terminate line */
          p = line;		/* Scan it to parse it */

	/* OK we now have a line in 'p' lets parse it and print it */
          
          /* Break on line that begins with a 2. It's the end of
           * data.
	   */
          if (*p == '2')
		break;

	  /*  lines beginning with 5 are errors, 
	   *  print them and quit
	   */
          if (*p == '5') {
            START(HTML_H2);
            PUTS(p+4);
            END(HTML_H2);
            break;
          }

	  if(*p == '-') {
	     /*  data lines look like  -200:#:
              *  where # is the search result number and can be multiple 
	      *  digits (infinate?)
              *  find the second colon and check the digit to the
              *  left of it to see if they are diferent
              *  if they are then a different person is starting. 
	      *  make this line an 

*/ /* find the second_colon */ second_colon = strchr( strchr(p,':')+1, ':'); if(second_colon != NULL) { /* error check */ if (*(second_colon-1) != last_char) /* print seperator */ { END(HTML_PRE); START(HTML_H2); } /* right now the record appears with the alias (first line) * as the header and the rest as
 text
		  * It might look better with the name as the
		  * header and the rest as a 
    with
  • tags * I'm not sure whether the name field comes in any * special order or if its even required in a record, * so for now the first line is the header no matter * what it is (it's almost always the alias) * A
    with the first line as the
    and * the rest as some form of
    might good also? */ /* print data */ PUTS(second_colon+1); PUTS("\n"); if (*(second_colon-1) != last_char) /* end seperator */ { END(HTML_H2); START(HTML_PRE); } /* save the char before the second colon * for comparison on the next pass */ last_char = *(second_colon-1) ; } /* end if second_colon */ } /* end if *p == '-' */ } /* if end of line */ } /* Loop over characters */ if (interrupted_in_htgetcharacter) { #ifndef DISABLE_TRACE if (www2Trace) fprintf (stderr, "parse_cso: picked up interrupt in htgc\n"); #endif (*targetClass.handle_interrupt)(target); return HT_INTERRUPTED; } /* end the text block */ PUTS("\n
    ");
      END_TARGET;
      FREE_TARGET;
    
      HTProgress ("Retrieved CSO search results.");
    
      return 1;  /* all done */
    } /* end of procedure */
    
    
    
    /*		De-escape a selector into a command
    **		-----------------------------------
    **
    **	The % hex escapes are converted. Otheriwse, the string is copied.
    */
    PRIVATE void de_escape ARGS2(char *, command, WWW_CONST char *, selector)
    {
      char *p;
    
      if (!selector)
        return;
      if (!command)
        return;
    
      p = strdup (selector);
      HTUnEscape (p);
      
      strcpy (command, p);
    
      free (p);
    
    #if 0
      for (p = command; *p; p++)
        if (*p == '+')
          *p = ' ';
    #endif
    
      return;
    }
    
    
    /*		Load by name					HTLoadGopher
    **		============
    **
    **	 Bug:	No decoding of strange data types as yet.
    **
    */
    PUBLIC int HTLoadGopher ARGS4(
    	char *,		arg,
    	HTParentAnchor *,	anAnchor,
    	HTFormat,		format_out,
    	HTStream*,		sink)
    {
      char *command;			/* The whole command */
      int status;				/* tcp return */
      char gtype;				/* Gopher Node type */
      char * selector;			/* Selector string */
      int rv = 0;
      
      if (!acceptable_inited) init_acceptable();
      
      if (!arg) 
        return -3;		/* Bad if no name sepcified	*/
      if (!*arg) 
        return -2;		/* Bad if name had zero length	*/
      
    #ifndef DISABLE_TRACE
      if (www2Trace) fprintf(stderr, "HTGopher: Looking for %s\n", arg);
    #endif
      
      /* Get entity type, and selector string.
       */        
      {
        char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
        gtype = '1';		/* Default = menu */
        selector = p1;
        if ((*selector++=='/') && (*selector)) 
          {	/* Skip first slash */
            gtype = *selector++;			/* Pick up gtype */
          }
        if (gtype == GOPHER_INDEX) 
          {
            char * query;
            query = strchr(selector, '?');	/* Look for search string */
            if (!query || !query[1]) 
              {		/* No search required */
                target = HTML_new(anAnchor, format_out, sink);
                targetClass = *target->isa;
                display_index(arg, anAnchor);	/* Display "cover page" */
                return HT_LOADED;			/* Local function only */
              }
            *query++ = 0;			/* Skip '?' 	*/
            HTUnEscape (query);
            command = malloc(strlen(selector)+ 1 + strlen(query)+ 2 + 1);
            
            de_escape(command, selector);
            
            strcat(command, "\t");
            strcat(command, query);
          } 
        else if (gtype == GOPHER_CSO) 
          {
            char * query;
            query = strchr(selector, '?');      /* Look for search string */
            if (!query || !query[1]) 
              {          /* No search required */
                target = HTML_new(anAnchor, format_out, sink);
                targetClass = *target->isa;
                display_cso(arg, anAnchor);     /* Display "cover page" */
                return HT_LOADED;                       /* Local function only */
              }
            *query++ = 0;                       /* Skip '?'     */
            HTUnEscape (query);
            command = malloc(strlen("query")+ 1 + strlen(query)+ 2 + 1);
            
            de_escape(command, selector);
            
            strcpy(command, "query ");
            strcat(command, query);
          } 
        else 
          {				/* Not index */
            command = malloc(strlen(selector)+2+1);
            de_escape(command, selector);
          }
        free(p1);
      }
      
      /* Patch security hole. */
      {
        char *tmp;
        for (tmp = command; *tmp; tmp++)
          if (*tmp == CR || *tmp == LF)
            *tmp = ' ';
    #ifndef DISABLE_TRACE
        if (www2Trace)
          fprintf (stderr, "Fixed security hole: '%s'\n", command);
    #endif
        *tmp++ = CR;
        *tmp++ = LF;
        *tmp++ = 0;
    #ifndef DISABLE_TRACE
        if (www2Trace)
          fprintf (stderr, "Prepared command: '%s'\n", command);
    #endif
      }
    
      status = HTDoConnect (arg, "Gopher", 70, &s);
      if (status == HT_INTERRUPTED)
        {
          /* Interrupt cleanly. */
    #ifndef DISABLE_TRACE
          if (www2Trace)
            fprintf (stderr,
                     "Gopher: Interrupted on connect; recovering cleanly.\n");
    #endif
          HTProgress ("Connection interrupted.");
          return HT_INTERRUPTED;
        }
      if (status<0)
        {
    #ifndef DISABLE_TRACE
          if (www2Trace) 
            fprintf(stderr, 
                    "HTTPAccess: Unable to connect to remote host for `%s'.\n",
                    arg);
    #endif
          free(command);
          return HT_NOT_LOADED;
        }
      
      HTInitInput(s);		/* Set up input buffering */
      
    #ifndef DISABLE_TRACE
      if (www2Trace) 
        fprintf(stderr, 
                "HTGopher: Connected, writing command `%s' to socket %d\n", 
                command, s);
    #endif
      
      status = NETWRITE(s, command, (int)strlen(command));
      free(command);
      if (status<0)
        {
    #ifndef DISABLE_TRACE
          if (www2Trace) fprintf(stderr, "HTGopher: Unable to send command.\n");
    #endif
          NETCLOSE (s);
          return HT_NOT_LOADED;
        }
      
      /* Now read the data from the socket: */    
      switch (gtype) 
        {
          int compressed;
          HTAtom *enc;
          extern int tweak_gopher_types;
                
        case GOPHER_MENU:
        case GOPHER_INDEX:
          target = HTML_new(anAnchor, format_out, sink);
          targetClass = *target->isa;
          rv = parse_menu(arg, anAnchor);
          break;
    
        case GOPHER_CSO:
          target = HTML_new(anAnchor, format_out, sink);
          targetClass = *target->isa;
          rv = parse_cso(arg, anAnchor);
          break;
          
        case GOPHER_MACBINHEX:
        case GOPHER_PCBINHEX:
        case GOPHER_UUENCODED:
        case GOPHER_BINARY:
          if (!tweak_gopher_types)
            rv = HTParseSocket(WWW_BINARY, format_out, anAnchor, s, sink, 0);
          else
            rv = HTParseSocket(HTFileFormat (arg, &enc, WWW_BINARY, &compressed),
                               format_out, anAnchor, s, sink, 0);
          break;
    
        case GOPHER_GIF:
        case GOPHER_IMAGE:
        case GOPHER_PLUS_IMAGE:
          if (!tweak_gopher_types)
            rv = HTParseSocket(HTAtom_for ("image/gif"), 
                               format_out, anAnchor, s, sink, 0);
          else
            rv = HTParseSocket(HTFileFormat (arg, &enc, HTAtom_for ("image/gif"), 
                                             &compressed),
                               format_out, anAnchor, s, sink, 0);
          break;
    
        case GOPHER_SOUND:
        case GOPHER_PLUS_SOUND:
          if (!tweak_gopher_types)
            rv = HTParseSocket(HTAtom_for ("audio/basic"), 
                               format_out, anAnchor, s, sink, 0);
          else
            rv = HTParseSocket(HTFileFormat (arg, &enc, 
                                             HTAtom_for ("audio/basic"), 
                                             &compressed),
                               format_out, anAnchor, s, sink, 0);
          break;
    
        case GOPHER_PLUS_MOVIE:
          /* Sigh..... */
          if (!tweak_gopher_types)
            rv = HTParseSocket(HTAtom_for ("video/mpeg"), 
                               format_out, anAnchor, s, sink, 0);
          else
            rv = HTParseSocket(HTFileFormat (arg, &enc, 
                                             HTAtom_for ("video/mpeg"), 
                                             &compressed),
                               format_out, anAnchor, s, sink, 0);
          break;
    
        case GOPHER_HTML:
          if (!tweak_gopher_types)
            rv = HTParseSocket(WWW_HTML, format_out, anAnchor, s, sink, 0);
          else
            rv = HTParseSocket(HTFileFormat (arg, &enc, WWW_HTML, &compressed),
                               format_out, anAnchor, s, sink, 0);
          break;
          
        case GOPHER_TEXT:
        default:			/* @@ parse as plain text */
          if (!tweak_gopher_types)
            rv = HTParseSocket(WWW_PLAINTEXT, format_out, anAnchor, s, sink, 0);
          else
            rv = HTParseSocket
              (HTFileFormat (arg, &enc, WWW_PLAINTEXT, &compressed),
               format_out, anAnchor, s, sink, 0);
          break;
        } /* switch(gtype) */
      
      NETCLOSE(s);
      if (rv == HT_INTERRUPTED)
        {
          HTProgress ("Connection interrupted.");
          return HT_INTERRUPTED;
        }
      else
        {
          return HT_LOADED;
        }
    }
    
    PUBLIC HTProtocol HTGopher = { "gopher", HTLoadGopher, NULL };