ASIWebPageRequest.m
15.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
//
// ASIWebPageRequest.m
// Part of ASIHTTPRequest -> http://allseeing-i.com/ASIHTTPRequest
//
// Created by Ben Copsey on 29/06/2010.
// Copyright 2010 All-Seeing Interactive. All rights reserved.
//
// This is an EXPERIMENTAL class - use at your own risk!
#import "ASIWebPageRequest.h"
#import "ASINetworkQueue.h"
static xmlChar *xpathExpr = (xmlChar *)"//link[@rel = \"stylesheet\"]/@href|//script/@src|//img/@src|//frame/@src|//iframe/@src|//*/@style";
static NSLock *xmlParsingLock = nil;
static NSMutableArray *requestsUsingXMLParser = nil;
@interface ASIWebPageRequest ()
- (void)readResourceURLs;
- (void)updateResourceURLs;
- (void)parseAsHTML;
- (void)parseAsCSS;
- (void)addURLToFetch:(NSString *)newURL;
+ (NSArray *)CSSURLsFromString:(NSString *)string;
@property (retain, nonatomic) ASINetworkQueue *externalResourceQueue;
@property (retain, nonatomic) NSMutableDictionary *resourceList;
@end
@implementation ASIWebPageRequest
+ (void)initialize
{
if (self == [ASIWebPageRequest class]) {
xmlParsingLock = [[NSLock alloc] init];
requestsUsingXMLParser = [[NSMutableArray alloc] init];
}
}
- (void)markAsFinished
{
}
- (void)requestFinished
{
webContentType = ASINotParsedWebContentType;
NSString *contentType = [[[self responseHeaders] objectForKey:@"Content-Type"] lowercaseString];
contentType = [[contentType componentsSeparatedByString:@";"] objectAtIndex:0];
if ([contentType isEqualToString:@"text/html"] || [contentType isEqualToString:@"text/xhtml"] || [contentType isEqualToString:@"text/xhtml+xml"] || [contentType isEqualToString:@"application/xhtml+xml"]) {
[self parseAsHTML];
return;
} else if ([contentType isEqualToString:@"text/css"]) {
[self parseAsCSS];
return;
}
[super requestFinished];
[super markAsFinished];
}
- (void)parseAsCSS
{
webContentType = ASICSSWebContentType;
NSString *responseCSS = [self responseString];
if (!responseCSS) {
[self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:100 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Unable to read HTML string from response",NSLocalizedDescriptionKey,nil]]];
return;
}
NSArray *urls = [[self class] CSSURLsFromString:responseCSS];
[self setResourceList:[NSMutableDictionary dictionary]];
for (NSString *theURL in urls) {
NSURL *newURL = [NSURL URLWithString:theURL relativeToURL:[self url]];
if (newURL) {
[self addURLToFetch:theURL];
}
}
if (![[self resourceList] count]) {
[super requestFinished];
[super markAsFinished];
return;
}
// Create a new request for every item in the queue
[[self externalResourceQueue] cancelAllOperations];
[self setExternalResourceQueue:[ASINetworkQueue queue]];
[[self externalResourceQueue] setDelegate:self];
[[self externalResourceQueue] setQueueDidFinishSelector:@selector(finishedFetchingExternalResources:)];
[[self externalResourceQueue] setRequestDidFinishSelector:@selector(externalResourceFetchSucceeded:)];
[[self externalResourceQueue] setRequestDidFailSelector:@selector(externalResourceFetchFailed:)];
[[self externalResourceQueue] setDownloadProgressDelegate:[self downloadProgressDelegate]];
for (NSString *theURL in [[self resourceList] keyEnumerator]) {
ASIWebPageRequest *externalResourceRequest = [ASIWebPageRequest requestWithURL:[NSURL URLWithString:theURL relativeToURL:[self url]]];
[externalResourceRequest setRequestHeaders:[self requestHeaders]];
[externalResourceRequest setDownloadCache:[self downloadCache]];
[externalResourceRequest setCachePolicy:[self cachePolicy]];
[externalResourceRequest setUserInfo:[NSDictionary dictionaryWithObject:theURL forKey:@"Path"]];
[[self externalResourceQueue] addOperation:externalResourceRequest];
}
[[self externalResourceQueue] go];
}
- (void)parseAsHTML
{
webContentType = ASIHTMLWebContentType;
NSString *responseHTML = [self responseString];
if (!responseHTML) {
[self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:100 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Unable to read HTML string from response",NSLocalizedDescriptionKey,nil]]];
return;
}
NSError *err = nil;
responseHTML = [ASIWebPageRequest XHTMLForString:responseHTML error:&err];
if (err) {
[self failWithError:err];
return;
} else if (!responseHTML) {
[self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:101 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Unable to convert response string to XHTML",NSLocalizedDescriptionKey,nil]]];
return;
}
// Only allow parsing of a single document at a time
[xmlParsingLock lock];
if (![requestsUsingXMLParser count]) {
xmlInitParser();
}
[requestsUsingXMLParser addObject:self];
// Strip the namespace, because it makes the xpath query a pain
responseHTML = [responseHTML stringByReplacingOccurrencesOfString:@" xmlns=\"http://www.w3.org/1999/xhtml\"" withString:@""];
NSData *data = [responseHTML dataUsingEncoding:NSUTF8StringEncoding];
/* Load XML document */
doc = xmlParseMemory([data bytes], (int)[data length]);
if (doc == NULL) {
xmlFreeDoc(doc);
[self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:101 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Error: unable to parse reponse XML",NSLocalizedDescriptionKey,nil]]];
return;
}
[self setResourceList:[NSMutableDictionary dictionary]];
// Populate the list of URLS to download
[self readResourceURLs];
[xmlParsingLock unlock];
if (![[self resourceList] count]) {
[super requestFinished];
[super markAsFinished];
return;
}
// Create a new request for every item in the queue
[[self externalResourceQueue] cancelAllOperations];
[self setExternalResourceQueue:[ASINetworkQueue queue]];
[[self externalResourceQueue] setDelegate:self];
[[self externalResourceQueue] setQueueDidFinishSelector:@selector(finishedFetchingExternalResources:)];
[[self externalResourceQueue] setRequestDidFinishSelector:@selector(externalResourceFetchSucceeded:)];
[[self externalResourceQueue] setRequestDidFailSelector:@selector(externalResourceFetchFailed:)];
[[self externalResourceQueue] setDownloadProgressDelegate:[self downloadProgressDelegate]];
for (NSString *theURL in [[self resourceList] keyEnumerator]) {
ASIWebPageRequest *externalResourceRequest = [ASIWebPageRequest requestWithURL:[NSURL URLWithString:theURL relativeToURL:[self url]]];
[externalResourceRequest setRequestHeaders:[self requestHeaders]];
[externalResourceRequest setDownloadCache:[self downloadCache]];
[externalResourceRequest setCachePolicy:[self cachePolicy]];
[externalResourceRequest setUserInfo:[NSDictionary dictionaryWithObject:theURL forKey:@"Path"]];
[[self externalResourceQueue] addOperation:externalResourceRequest];
}
[[self externalResourceQueue] go];
}
- (void)externalResourceFetchSucceeded:(ASIHTTPRequest *)externalResourceRequest
{
NSString *originalPath = [[externalResourceRequest userInfo] objectForKey:@"Path"];
NSMutableDictionary *requestResponse = [[self resourceList] objectForKey:originalPath];
NSString *contentType = [[externalResourceRequest responseHeaders] objectForKey:@"Content-Type"];
if (!contentType) {
contentType = @"application/octet-stream";
}
[requestResponse setObject:contentType forKey:@"ContentType"];
[requestResponse setObject:[externalResourceRequest responseData] forKey:@"Data"];
}
- (void)externalResourceFetchFailed:(ASIHTTPRequest *)externalResourceRequest
{
[self failWithError:[externalResourceRequest error]];
}
- (void)finishedFetchingExternalResources:(ASINetworkQueue *)queue
{
if (webContentType == ASICSSWebContentType) {
NSMutableString *parsedResponse = [[[self responseString] mutableCopy] autorelease];
if (![self error]) {
for (NSString *resource in [[self resourceList] keyEnumerator]) {
NSDictionary *resourceInfo = [[self resourceList] objectForKey:resource];
NSData *data = [resourceInfo objectForKey:@"Data"];
NSString *contentType = [resourceInfo objectForKey:@"ContentType"];
if (data && contentType) {
if (data && contentType) {
NSString *newData = [NSString stringWithFormat:@"data:%@;base64,",contentType];
newData = [newData stringByAppendingString:[ASIHTTPRequest base64forData:data]];
[parsedResponse replaceOccurrencesOfString:resource withString:newData options:0 range:NSMakeRange(0, [parsedResponse length])];
}
}
}
}
[self setRawResponseData:(id)[parsedResponse dataUsingEncoding:NSUTF8StringEncoding]];
} else {
[xmlParsingLock lock];
[self updateResourceURLs];
xmlChar *bytes = nil;
int size = 0;
xmlDocDumpMemory(doc,&bytes,&size);
[self setRawResponseData:[[[NSMutableData alloc] initWithBytes:bytes length:size] autorelease]];
xmlFreeDoc(doc);
doc = nil;
[requestsUsingXMLParser removeObject:self];
if (![requestsUsingXMLParser count]) {
xmlCleanupParser();
}
[xmlParsingLock unlock];
}
[self setResponseEncoding:NSUTF8StringEncoding];
NSMutableDictionary *newHeaders = [[[self responseHeaders] mutableCopy] autorelease];
[newHeaders removeObjectForKey:@"Content-Encoding"];
[self setResponseHeaders:newHeaders];
[super requestFinished];
[[self downloadCache] storeResponseForRequest:self maxAge:[self secondsToCache]];
[super markAsFinished];
}
- (void)readResourceURLs
{
/* Create xpath evaluation context */
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if(xpathCtx == NULL) {
xmlFreeDoc(doc);
[self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:101 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Error: unable to create new XPath context",NSLocalizedDescriptionKey,nil]]];
return;
}
/* Evaluate xpath expression */
xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx);
if(xpathObj == NULL) {
xmlXPathFreeContext(xpathCtx);
xmlFreeDoc(doc);
[self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:101 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Error: unable to evaluate XPath expression!",NSLocalizedDescriptionKey,nil]]];
return;
}
xmlNodeSetPtr nodes = xpathObj->nodesetval;
int size = (nodes) ? nodes->nodeNr : 0;
int i;
for(i = size - 1; i >= 0; i--) {
assert(nodes->nodeTab[i]);
NSString *nodeName = [NSString stringWithCString:(char *)nodes->nodeTab[i]->name encoding:NSUTF8StringEncoding];
NSString *value = [NSString stringWithCString:(char *)xmlNodeGetContent(nodes->nodeTab[i]) encoding:NSUTF8StringEncoding];
if ([[nodeName lowercaseString] isEqualToString:@"style"]) {
NSArray *externalResources = [[self class] CSSURLsFromString:value];
for (NSString *theURL in externalResources) {
[self addURLToFetch:theURL];
}
} else {
[self addURLToFetch:value];
}
if (nodes->nodeTab[i]->type != XML_NAMESPACE_DECL) {
nodes->nodeTab[i] = NULL;
}
}
xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx);
}
- (void)addURLToFetch:(NSString *)newURL
{
// Get rid of any surrounding whitespace
newURL = [newURL stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
// Don't attempt to fetch data URIs
if (![[[newURL substringToIndex:5] lowercaseString] isEqualToString:@"data:"]) {
[[self resourceList] setObject:[NSMutableDictionary dictionary] forKey:newURL];
}
}
- (void)updateResourceURLs
{
/* Create xpath evaluation context */
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if(xpathCtx == NULL) {
xmlFreeDoc(doc);
[self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:101 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Error: unable to create new XPath context",NSLocalizedDescriptionKey,nil]]];
return;
}
/* Evaluate xpath expression */
xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx);
if(xpathObj == NULL) {
xmlXPathFreeContext(xpathCtx);
xmlFreeDoc(doc);
[self failWithError:[NSError errorWithDomain:NetworkRequestErrorDomain code:101 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:@"Error: unable to evaluate XPath expression!",NSLocalizedDescriptionKey,nil]]];
return;
}
xmlNodeSetPtr nodes = xpathObj->nodesetval;
int size = (nodes) ? nodes->nodeNr : 0;
int i;
for(i = size - 1; i >= 0; i--) {
assert(nodes->nodeTab[i]);
NSString *nodeName = [NSString stringWithCString:(char *)nodes->nodeTab[i]->name encoding:NSUTF8StringEncoding];
NSString *value = [NSString stringWithCString:(char *)xmlNodeGetContent(nodes->nodeTab[i]) encoding:NSUTF8StringEncoding];
if ([[nodeName lowercaseString] isEqualToString:@"style"]) {
NSArray *externalResources = [[self class] CSSURLsFromString:value];
for (NSString *theURL in externalResources) {
NSData *data = [[resourceList objectForKey:theURL] objectForKey:@"Data"];
NSString *contentType = [[resourceList objectForKey:theURL] objectForKey:@"ContentType"];
if (data && contentType) {
NSString *newData = [NSString stringWithFormat:@"data:%@;base64,",contentType];
newData = [newData stringByAppendingString:[ASIHTTPRequest base64forData:data]];
value = [value stringByReplacingOccurrencesOfString:theURL withString:newData];
}
}
xmlNodeSetContent(nodes->nodeTab[i], (xmlChar *)[value cStringUsingEncoding:NSUTF8StringEncoding]);
} else {
NSData *data = [[resourceList objectForKey:value] objectForKey:@"Data"];
NSString *contentType = [[resourceList objectForKey:value] objectForKey:@"ContentType"];
if (data && contentType) {
NSString *newData = [NSString stringWithFormat:@"data:%@;base64,",contentType];
newData = [newData stringByAppendingString:[ASIHTTPRequest base64forData:data]];
xmlNodeSetContent(nodes->nodeTab[i], (xmlChar *)[newData cStringUsingEncoding:NSUTF8StringEncoding]);
}
}
if (nodes->nodeTab[i]->type != XML_NAMESPACE_DECL) {
nodes->nodeTab[i] = NULL;
}
}
xmlXPathFreeObject(xpathObj);
xmlXPathFreeContext(xpathCtx);
}
+ (NSString *)XHTMLForString:(NSString *)inputHTML error:(NSError **)error
{
const char* input = [inputHTML cStringUsingEncoding:NSUTF8StringEncoding];
TidyBuffer output = {0,0,0,0};
TidyBuffer errbuf = {0,0,0,0};
int rc = -1;
Bool ok;
TidyDoc tdoc = tidyCreate();
ok = tidyOptSetBool(tdoc, TidyXhtmlOut, yes);
if (ok) {
rc = tidySetErrorBuffer(tdoc, &errbuf);
}
if (rc >= 0) {
rc = (tidyOptSetBool(tdoc, TidyXmlDecl, yes) ? rc : -1 );
rc = (tidyOptSetValue(tdoc, TidyCharEncoding, "utf8") ? rc : -1 );
rc = (tidyOptSetValue(tdoc, TidyDoctype, "auto") ? rc : -1 );
// Stop tidy stripping HTML 5 tags
rc = (tidyOptSetValue(tdoc, TidyBlockTags, "header, section, nav, footer, article, audio, video") ? rc : -1);
}
if (rc >= 0) {
rc = tidyParseString(tdoc, input);
}
if (rc >= 0) {
rc = tidyCleanAndRepair(tdoc);
}
if (rc >= 0) {
rc = tidyRunDiagnostics(tdoc);
}
if (rc > 1) {
rc = (tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
}
if (rc >= 0) {
rc = tidySaveBuffer(tdoc, &output);
}
if (rc < 0) {
*error = [NSError errorWithDomain:NetworkRequestErrorDomain code:102 userInfo:[NSDictionary dictionaryWithObjectsAndKeys:[NSString stringWithFormat:@"Failed to tidy HTML with error code %d",rc],NSLocalizedDescriptionKey,nil]];
return nil;
}
NSString *xhtml = [[[NSString alloc] initWithBytes:output.bp length:output.size encoding:NSUTF8StringEncoding] autorelease];
tidyBufFree(&output);
tidyBufFree(&errbuf);
tidyRelease(tdoc);
return xhtml;
}
+ (NSArray *)CSSURLsFromString:(NSString *)string
{
NSMutableArray *urls = [NSMutableArray array];
NSScanner *scanner = [NSScanner scannerWithString:string];
[scanner setCaseSensitive:NO];
while (1) {
NSString *theURL = nil;
[scanner scanUpToString:@"url(" intoString:NULL];
[scanner scanString:@"url(" intoString:NULL];
[scanner scanUpToString:@")" intoString:&theURL];
if (!theURL) {
break;
}
// Remove any quotes around the url
[urls addObject:[theURL stringByTrimmingCharactersInSet:[NSCharacterSet characterSetWithCharactersInString:@"\"'"]]];
}
return urls;
}
@synthesize externalResourceQueue;
@synthesize resourceList;
@end