Ben Copsey

Added support for inline css images, iframes and framesets

@@ -10,7 +10,7 @@ @@ -10,7 +10,7 @@
10 #import "ASIWebPageRequest.h" 10 #import "ASIWebPageRequest.h"
11 #import "ASINetworkQueue.h" 11 #import "ASINetworkQueue.h"
12 12
13 -static xmlChar *xpathExpr = (xmlChar *)"//link[@rel = \"stylesheet\"]/@href|//script/@src|//img/@src"; 13 +static xmlChar *xpathExpr = (xmlChar *)"//link[@rel = \"stylesheet\"]/@href|//script/@src|//img/@src|//frame/@src|//iframe/@src|//*/@style";
14 14
15 static NSLock *xmlParsingLock = nil; 15 static NSLock *xmlParsingLock = nil;
16 static NSMutableArray *requestsUsingXMLParser = nil; 16 static NSMutableArray *requestsUsingXMLParser = nil;
@@ -20,6 +20,7 @@ static NSMutableArray *requestsUsingXMLParser = nil; @@ -20,6 +20,7 @@ static NSMutableArray *requestsUsingXMLParser = nil;
20 - (void)updateResourceURLs; 20 - (void)updateResourceURLs;
21 - (void)parseAsHTML; 21 - (void)parseAsHTML;
22 - (void)parseAsCSS; 22 - (void)parseAsCSS;
  23 ++ (NSArray *)CSSURLsFromString:(NSString *)string;
23 @property (retain, nonatomic) ASINetworkQueue *externalResourceQueue; 24 @property (retain, nonatomic) ASINetworkQueue *externalResourceQueue;
24 @property (retain, nonatomic) NSMutableDictionary *resourceList; 25 @property (retain, nonatomic) NSMutableDictionary *resourceList;
25 @end 26 @end
@@ -62,19 +63,7 @@ static NSMutableArray *requestsUsingXMLParser = nil; @@ -62,19 +63,7 @@ static NSMutableArray *requestsUsingXMLParser = nil;
62 [self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:100 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Unable to read HTML string from response",NSLocalizedDescriptionKey,nil]]]; 63 [self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:100 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Unable to read HTML string from response",NSLocalizedDescriptionKey,nil]]];
63 return; 64 return;
64 } 65 }
65 - NSMutableArray *urls = [NSMutableArray array]; 66 + NSArray *urls = [[self class] CSSURLsFromString:responseCSS];
66 - NSScanner *scanner = [NSScanner scannerWithString:responseCSS];  
67 - [scanner setCaseSensitive:NO];  
68 - while (1) {  
69 - NSString *theURL = nil;  
70 - [scanner scanUpToString:@"url(" intoString:NULL];  
71 - [scanner scanString:@"url(" intoString:NULL];  
72 - [scanner scanUpToString:@")" intoString:&theURL];  
73 - if (!theURL) {  
74 - break;  
75 - }  
76 - [urls addObject:theURL];  
77 - }  
78 67
79 [self setResourceList:[NSMutableDictionary dictionary]]; 68 [self setResourceList:[NSMutableDictionary dictionary]];
80 69
@@ -277,8 +266,16 @@ static NSMutableArray *requestsUsingXMLParser = nil; @@ -277,8 +266,16 @@ static NSMutableArray *requestsUsingXMLParser = nil;
277 int i; 266 int i;
278 for(i = size - 1; i >= 0; i--) { 267 for(i = size - 1; i >= 0; i--) {
279 assert(nodes->nodeTab[i]); 268 assert(nodes->nodeTab[i]);
280 - NSString *theURL = [NSString stringWithCString:(char *)xmlNodeGetContent(nodes->nodeTab[i]) encoding:NSUTF8StringEncoding]; 269 + NSString *nodeName = [NSString stringWithCString:(char *)nodes->nodeTab[i]->name encoding:NSUTF8StringEncoding];
281 - [resourceList setObject:[NSMutableDictionary dictionary] forKey:theURL]; 270 + NSString *value = [NSString stringWithCString:(char *)xmlNodeGetContent(nodes->nodeTab[i]) encoding:NSUTF8StringEncoding];
  271 + if ([[nodeName lowercaseString] isEqualToString:@"style"]) {
  272 + NSArray *externalResources = [[self class] CSSURLsFromString:value];
  273 + for (NSString *theURL in externalResources) {
  274 + [resourceList setObject:[NSMutableDictionary dictionary] forKey:theURL];
  275 + }
  276 + } else {
  277 + [resourceList setObject:[NSMutableDictionary dictionary] forKey:value];
  278 + }
282 if (nodes->nodeTab[i]->type != XML_NAMESPACE_DECL) { 279 if (nodes->nodeTab[i]->type != XML_NAMESPACE_DECL) {
283 nodes->nodeTab[i] = NULL; 280 nodes->nodeTab[i] = NULL;
284 } 281 }
@@ -313,14 +310,30 @@ static NSMutableArray *requestsUsingXMLParser = nil; @@ -313,14 +310,30 @@ static NSMutableArray *requestsUsingXMLParser = nil;
313 int i; 310 int i;
314 for(i = size - 1; i >= 0; i--) { 311 for(i = size - 1; i >= 0; i--) {
315 assert(nodes->nodeTab[i]); 312 assert(nodes->nodeTab[i]);
316 - NSString *theURL = [NSString stringWithCString:(char *)xmlNodeGetContent(nodes->nodeTab[i]) encoding:NSUTF8StringEncoding]; 313 + NSString *nodeName = [NSString stringWithCString:(char *)nodes->nodeTab[i]->name encoding:NSUTF8StringEncoding];
317 - NSData *data = [[resourceList objectForKey:theURL] objectForKey:@"Data"]; 314 + NSString *value = [NSString stringWithCString:(char *)xmlNodeGetContent(nodes->nodeTab[i]) encoding:NSUTF8StringEncoding];
318 - NSString *contentType = [[resourceList objectForKey:theURL] objectForKey:@"ContentType"]; 315 + if ([[nodeName lowercaseString] isEqualToString:@"style"]) {
319 - if (data && contentType) { 316 + NSArray *externalResources = [[self class] CSSURLsFromString:value];
320 - NSString *newData = [NSString stringWithFormat:@"data:%@;base64,",contentType]; 317 + for (NSString *theURL in externalResources) {
321 - newData = [newData stringByAppendingString:[ASIHTTPRequest base64forData:data]]; 318 + NSData *data = [[resourceList objectForKey:theURL] objectForKey:@"Data"];
322 - xmlNodeSetContent(nodes->nodeTab[i], (xmlChar *)[newData cStringUsingEncoding:NSUTF8StringEncoding]); 319 + NSString *contentType = [[resourceList objectForKey:theURL] objectForKey:@"ContentType"];
  320 + if (data && contentType) {
  321 + NSString *newData = [NSString stringWithFormat:@"data:%@;base64,",contentType];
  322 + newData = [newData stringByAppendingString:[ASIHTTPRequest base64forData:data]];
  323 + value = [value stringByReplacingOccurrencesOfString:theURL withString:newData];
  324 + }
  325 + }
  326 + xmlNodeSetContent(nodes->nodeTab[i], (xmlChar *)[value cStringUsingEncoding:NSUTF8StringEncoding]);
  327 + } else {
  328 + NSData *data = [[resourceList objectForKey:value] objectForKey:@"Data"];
  329 + NSString *contentType = [[resourceList objectForKey:value] objectForKey:@"ContentType"];
  330 + if (data && contentType) {
  331 + NSString *newData = [NSString stringWithFormat:@"data:%@;base64,",contentType];
  332 + newData = [newData stringByAppendingString:[ASIHTTPRequest base64forData:data]];
  333 + xmlNodeSetContent(nodes->nodeTab[i], (xmlChar *)[newData cStringUsingEncoding:NSUTF8StringEncoding]);
  334 + }
323 } 335 }
  336 +
324 if (nodes->nodeTab[i]->type != XML_NAMESPACE_DECL) { 337 if (nodes->nodeTab[i]->type != XML_NAMESPACE_DECL) {
325 nodes->nodeTab[i] = NULL; 338 nodes->nodeTab[i] = NULL;
326 } 339 }
@@ -347,7 +360,7 @@ static NSMutableArray *requestsUsingXMLParser = nil; @@ -347,7 +360,7 @@ static NSMutableArray *requestsUsingXMLParser = nil;
347 if (rc >= 0) { 360 if (rc >= 0) {
348 rc = (tidyOptSetBool(tdoc, TidyXmlDecl, yes) ? rc : -1 ); 361 rc = (tidyOptSetBool(tdoc, TidyXmlDecl, yes) ? rc : -1 );
349 rc = (tidyOptSetValue(tdoc, TidyCharEncoding, "utf8") ? rc : -1 ); 362 rc = (tidyOptSetValue(tdoc, TidyCharEncoding, "utf8") ? rc : -1 );
350 - rc = (tidyOptSetValue(tdoc, TidyDoctype, "strict") ? rc : -1 ); 363 + rc = (tidyOptSetValue(tdoc, TidyDoctype, "auto") ? rc : -1 );
351 // Stop tidy stripping HTML 5 tags 364 // Stop tidy stripping HTML 5 tags
352 rc = (tidyOptSetValue(tdoc, TidyBlockTags, "header, section, nav, footer, article, audio, video") ? rc : -1); 365 rc = (tidyOptSetValue(tdoc, TidyBlockTags, "header, section, nav, footer, article, audio, video") ? rc : -1);
353 } 366 }
@@ -383,6 +396,24 @@ static NSMutableArray *requestsUsingXMLParser = nil; @@ -383,6 +396,24 @@ static NSMutableArray *requestsUsingXMLParser = nil;
383 return xhtml; 396 return xhtml;
384 } 397 }
385 398
  399 ++ (NSArray *)CSSURLsFromString:(NSString *)string
  400 +{
  401 + NSMutableArray *urls = [NSMutableArray array];
  402 + NSScanner *scanner = [NSScanner scannerWithString:string];
  403 + [scanner setCaseSensitive:NO];
  404 + while (1) {
  405 + NSString *theURL = nil;
  406 + [scanner scanUpToString:@"url(" intoString:NULL];
  407 + [scanner scanString:@"url(" intoString:NULL];
  408 + [scanner scanUpToString:@")" intoString:&theURL];
  409 + if (!theURL) {
  410 + break;
  411 + }
  412 + [urls addObject:theURL];
  413 + }
  414 + return urls;
  415 +}
  416 +
386 @synthesize externalResourceQueue; 417 @synthesize externalResourceQueue;
387 @synthesize resourceList; 418 @synthesize resourceList;
388 @end 419 @end
@@ -380,7 +380,7 @@ @@ -380,7 +380,7 @@
380 380
381 - (IBAction)fetchWebPage:(id)sender 381 - (IBAction)fetchWebPage:(id)sender
382 { 382 {
383 - ASIWebPageRequest *request = [[[ASIWebPageRequest alloc] initWithURL:[NSURL URLWithString:@"http://allseeing-i.com/ASIHTTPRequest/Who-is-using-it"]] autorelease]; 383 + ASIWebPageRequest *request = [[[ASIWebPageRequest alloc] initWithURL:[NSURL URLWithString:@"http://asi/ASIHTTPRequest/tests/ASIWebPageRequest/index.html"]] autorelease];
384 [request setDidFailSelector:@selector(webPageFetchFailed:)]; 384 [request setDidFailSelector:@selector(webPageFetchFailed:)];
385 [request setDidFinishSelector:@selector(webPageFetchSucceeded:)]; 385 [request setDidFinishSelector:@selector(webPageFetchSucceeded:)];
386 [request setDelegate:self]; 386 [request setDelegate:self];