Example: Links by Condition On this page In this example, we will demonstrate how to use the InstantAPI.ai Retrieve API endpoint to scrape links from a web page that meet a specific condition you set. This in particularly helpful when recursively scraping web pages throughout a website, but you wish to limit the crawl to the kinds of web pages you require. We will provide a complete request and explain what each part of the request does. Additionally, we will show the expected response structure.
Complete Request# Here is a complete request to the Retrieve API endpoint:
cURL
Python
Node.js
JavaScript
Go
Ruby
PHP
curl --location 'https://instantapi.ai/api/retrieve/' \
--header 'Content-Type: application/json' \
--data '{
"webpage_url": "https://www.templeandwebster.com.au/Bed-Sheets-C1813085.html?itemsperpage=24&sortby=6&curpage=1",
"api_method_name": "getIndividualProductURLs",
"api_response_structure": "{\"urls\":[{\"url\":\"\"}],\"next_page_url\":\"\"}",
"link_extract": true,
"api_key": "<your api key>"
}'
import requests
url = "https://instantapi.ai/api/retrieve/"
payload = {
"webpage_url" : "https://www.templeandwebster.com.au/Bed-Sheets-C1813085.html?itemsperpage=24&sortby=6&curpage=1" ,
"api_method_name" : "getIndividualProductURLs" ,
"api_response_structure" : "{ \" urls \" :[{ \" url \" : \"\" }], \" next_page_url \" : \"\" }" ,
"link_extract" : True ,
"api_key" : "<your api key>"
}
headers = {
"Content-Type" : "application/json"
}
response = requests . post ( url , json = payload , headers = headers )
print ( response . text )
const axios = require ( 'axios' );
const data = {
"webpage_url" : "https://www.templeandwebster.com.au/Bed-Sheets-C1813085.html?itemsperpage=24&sortby=6&curpage=1" ,
"api_method_name" : "getIndividualProductURLs" ,
"api_response_structure" : "{\"urls\":[{\"url\":\"\"}],\"next_page_url\":\"\"}" ,
"link_extract" : true ,
"api_key" : "<your api key>"
};
axios . post ( 'https://instantapi.ai/api/retrieve/' , data , {
headers : {
'Content-Type' : 'application/json'
}
})
. then (( response ) => {
console . log ( response . data );
})
. catch (( error ) => {
console . error ( error );
});
const url = "https://instantapi.ai/api/retrieve/" ;
const data = {
"webpage_url" : "https://www.templeandwebster.com.au/Bed-Sheets-C1813085.html?itemsperpage=24&sortby=6&curpage=1" ,
"api_method_name" : "getIndividualProductURLs" ,
"api_response_structure" : "{\"urls\":[{\"url\":\"\"}],\"next_page_url\":\"\"}" ,
"link_extract" : true ,
"api_key" : "<your api key>"
};
fetch ( url , {
method : 'POST' ,
headers : {
'Content-Type' : 'application/json'
},
body : JSON . stringify ( data )
})
. then ( response => response . json ())
. then ( data => console . log ( data ))
. catch ( error => console . error ( 'Error:' , error ));
package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
)
type RequestData struct {
WebpageURL string `json:"webpage_url"`
APIMethodName string `json:"api_method_name"`
APIResponseStructure string `json:"api_response_structure"`
LinkExtract bool `json:"link_extract"`
APIKey string `json:"api_key"`
}
func main () {
url := "https://instantapi.ai/api/retrieve/"
data := RequestData {
WebpageURL : "https://www.templeandwebster.com.au/Bed-Sheets-C1813085.html?itemsperpage=24&sortby=6&curpage=1" ,
APIMethodName : "getIndividualProductURLs" ,
APIResponseStructure : "{\"urls\":[{\"url\":\"\"}],\"next_page_url\":\"\"}" ,
LinkExtract : true ,
APIKey : "<your api key>" ,
}
jsonData , err := json . Marshal ( data )
if err != nil {
fmt . Println ( "Error marshaling JSON:" , err )
return
}
req , err := http . NewRequest ( "POST" , url , bytes . NewBuffer ( jsonData ))
if err != nil {
fmt . Println ( "Error creating request:" , err )
return
}
req . Header . Set ( "Content-Type" , "application/json" )
client := & http . Client {}
resp , err := client . Do ( req )
if err != nil {
fmt . Println ( "Error making request:" , err )
return
}
defer resp . Body . Close ()
body , err := ioutil . ReadAll ( resp . Body )
if err != nil {
fmt . Println ( "Error reading response body:" , err )
return
}
fmt . Println ( string ( body ))
}
require 'net/http'
require 'uri'
require 'json'
uri = URI ( "https://instantapi.ai/api/retrieve/" )
request = Net :: HTTP :: Post . new ( uri , 'Content-Type' => 'application/json' )
request . body = {
webpage_url : "https://www.templeandwebster.com.au/Bed-Sheets-C1813085.html?itemsperpage=24&sortby=6&curpage=1" ,
api_method_name : "getIndividualProductURLs" ,
api_response_structure : {
urls : [
{
url : ""
}
] ,
next_page_url : ""
} . to_json ,
link_extract : true ,
api_key : "<your api key>"
} . to_json
response = Net :: HTTP . start ( uri . hostname , uri . port , use_ssl : true ) do | http |
http . request ( request )
end
puts response . body
$url = 'https://instantapi.ai/api/retrieve/' ;
$data = array (
"webpage_url" => "https://www.templeandwebster.com.au/Bed-Sheets-C1813085.html?itemsperpage=24&sortby=6&curpage=1" ,
"api_method_name" => "getIndividualProductURLs" ,
"api_response_structure" => json_encode ( array (
"urls" => array (
array (
"url" => ""
)
),
"next_page_url" => ""
)),
"link_extract" => true ,
"api_key" => "<your api key>"
);
$ch = curl_init ( $url );
curl_setopt ( $ch , CURLOPT_RETURNTRANSFER , true );
curl_setopt ( $ch , CURLOPT_HTTPHEADER , array ( 'Content-Type: application/json' ));
curl_setopt ( $ch , CURLOPT_POST , true );
curl_setopt ( $ch , CURLOPT_POSTFIELDS , json_encode ( $data ));
$response = curl_exec ( $ch );
if ( $response === false ) {
echo "Error: " . curl_error ( $ch );
} else {
echo $response ;
}
curl_close ( $ch );
Request Breakdown# webpage_url : The URL of the web page you want to scrape.api_method_name : A user-defined name for the API action, in this case, “getIndividualProductURLs”.api_response_structure : The expected structure of the APIs response, defined by you. This includes placeholders for the details you want to scrape.link_extract : Turns the link extract mode on.api_key : Get your API key .Expected Response Structure# The expected response structure is defined in the api_response_structure
parameter. Here is the formatted version for clarity:
{
"urls" : [
{
"url" : ""
}
],
"next_page_url" : ""
}
Example Response# When the request is successfully processed, you can expect a response similar to the following:
{
"urls" : [
{
"url" : "https://www.templeandwebster.com.au/Pure-French-Flax-Linen-Fitted-Sheet-JHLF-TMPL2720.html"
},
{
"url" : "https://www.templeandwebster.com.au/Luxury-1000TC-Cotton-Blend-Sheet-Set-TMPL1635.html"
},
{
"url" : "https://www.templeandwebster.com.au/400TC-Bamboo-and-Cotton-Sheet-Set-SUBSSW-TMPL3570.html"
},
{
"url" : "https://www.templeandwebster.com.au/Lexi-Cotton-Fitted-Sheet-and-Pillowcase-Set-TMPL5488.html"
},
{
"url" : "https://www.templeandwebster.com.au/Vintage-Washed-Sheet-Set-TMPL5294.html"
},
{
"url" : "https://www.templeandwebster.com.au/Gingham-Washed-Cotton-Sheet-Set-GIOI1163.html"
},
{
"url" : "https://www.templeandwebster.com.au/Kirby-Cotton-Flannelette-Sheet-Set-TMPL5823.html"
},
{
"url" : "https://www.templeandwebster.com.au/Laila-Vintage-Washed-Sheet-Set-TMPL6118.html"
},
{
"url" : "https://www.templeandwebster.com.au/Heston-Cotton-Fitted-Sheet-and-Pillowcase-Set-BICN2191.html"
},
{
"url" : "https://www.templeandwebster.com.au/500TC-Bamboo-and-Cotton-Fitted-Sheet-Set-PAVE1096.html"
},
{
"url" : "https://www.templeandwebster.com.au/Organic-Washed-Cotton-Sheet-Set-LALN1002.html"
},
{
"url" : "https://www.templeandwebster.com.au/Washed-Microfibre-Sheet-Set-DRMK1828.html"
},
{
"url" : "https://www.templeandwebster.com.au/Washed-Microfibre-Fitted-Sheet-Set-DRMK1844.html"
},
{
"url" : "https://www.templeandwebster.com.au/400TC-Bamboo-Sheet-Set-NATB1046.html"
},
{
"url" : "https://www.templeandwebster.com.au/Brett-Vintage-Washed-Microfibre-Sheet-Set-CPLY1032.html"
},
{
"url" : "https://www.templeandwebster.com.au/Park-Avenue-Bamboo-and-Cotton-Sheet-Set-PAVE1069.html"
},
{
"url" : "https://www.templeandwebster.com.au/Vienna-Cotton-Fitted-Sheet-with-40cm-Wall-0172-LNHO2697.html"
},
{
"url" : "https://www.templeandwebster.com.au/Heston-300TC-Cotton-Percale-Sheet-Set-BICN2089.html"
},
{
"url" : "https://www.templeandwebster.com.au/Bamboo-and-Microfibre-Sheet-Set-ROLC1179.html"
},
{
"url" : "https://www.templeandwebster.com.au/Harriet-Bamboo-and-Cotton-Fitted-Sheet-Set-GIOI1029.html"
},
{
"url" : "https://www.templeandwebster.com.au/Soft-Touch-Waterproof-Cot-Fitted-Sheet-DRMK1612.html"
},
{
"url" : "https://www.templeandwebster.com.au/European-Flax-Linen-Sheet-Set-70072-NATB1038.html"
},
{
"url" : "https://www.templeandwebster.com.au/1000TC-Egyptian-Cotton-Sheet-Set-LALN1032.html"
},
{
"url" : "https://www.templeandwebster.com.au/Park-Avenue-Egyptian-Cotton-Flannelette-Sheet-Set-PAVE1068.html"
}
],
"next_page_url" : "https://www.templeandwebster.com.au/Bed-Sheets-C1813085.html?itemsperpage=24&sortby=6&curpage=2"
}
By following this example, you can easily use InstantAPI.ai to scrape and transform data from various web pages into structured data tailored to your needs.