How can I extract urls from particular domain from html document using php and regular expression?

  php, regex, url
class Scraper
{
    private $resource;
    private $domain;

    public function __construct($domain,$resource)
    {
        $this->domain = $domain;
        $this->resource = $resource;
    }

    public function getLinks(){
        // https://stackoverflow.com/questions/36564293/extract-urls-from-a-string-using-php
        preg_match_all('#bhttps?://[^,s()<>]+(?:([wd]+)|([^,[:punct:]s]|/))#', $this->resource, $match);

        return $match[0];
    }

}

Link in the comment Extract URL's from a string using PHP

The resource is generated outside this class just using file_get_contents("https://stackoverflow.com");

And here is what I obtain

0 => "https://cdn.sstatic.net/Sites/stackoverflow/Img/favicon.ico?v=ec617d715196"
  1 => "https://cdn.sstatic.net/Sites/stackoverflow/Img/apple-touch-icon.png?v=c78bd457575a"
  2 => "https://cdn.sstatic.net/Sites/stackoverflow/Img/apple-touch-icon.png?v=c78bd457575a"
  3 => "https://stackoverflow.com/"/"
  4 => "https://cdn.sstatic.net/Sites/stackoverflow/Img/[email protected]?v=73d79a89bded"
  5 => "https://ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js"
  6 => "https://cdn.sstatic.net/Js/stub.en.js?v=b6b81fff6fef"
  7 => "https://cdn.sstatic.net/Shared/stacks.css?v=18ec1a31c3b1"
  8 => "https://cdn.sstatic.net/Sites/stackoverflow/primary.css?v=1c3f5fb04641"
  9 => "https://cdn.sstatic.net/Shared/Product/product.css?v=e26bdc65a1f5"
  10 => "https://cdn.sstatic.net/Js/product-inview-animations.en.js?v=fd33cbb5526b"
  11 => "https://cdn.sstatic.net/Js/product-lazy-load-images.en.js?v=5f75ca13778c"
  12 => "https://cdn.sstatic.net/Shared/Channels/channels.css?v=89163e79f6d2"
  13 => "https://stackauth.com"
  14 => "https://meta.stackoverflow.com"
  15 => "https://cdn.sstatic.net/Img/stacks-icons"
  16 => "https://stacksnippets.net"
  17 => "https://www.opavote.com/results/"
  18 => "https://stackoverflow.com"
  19 => "https://stackoverflow.com/talent"
  20 => "https://stackoverflow.com/advertising"
  21 => "https://stackoverflow.com/users/login?ssrc=head&returnurl=https%3a%2f%2fstackoverflow.com%2f"
  22 => "https://stackoverflow.com/users/signup?ssrc=head&returnurl=%2fusers%2fstory%2fcurrent"
  23 => "https://stackoverflow.com"
  24 => "https://stackoverflow.com"
  25 => "https://stackoverflow.com/help"
  26 => "https://chat.stackoverflow.com/?tab=site&amp;host=stackoverflow.com"
  27 => "https://meta.stackoverflow.com"
  28 => "https://stackoverflow.com/users/signup?ssrc=site_switcher&amp;returnurl=%2fusers%2fstory%2fcurrent"
  29 => "https://stackoverflow.com/users/login?ssrc=site_switcher&amp;returnurl=https%3a%2f%2fstackoverflow.com%2f"
  30 => "https://stackexchange.com/sites"
  31 => "https://stackoverflow.blog"
  32 => "https://cdn.sstatic.net/Img/teams/teams-illo-free-sidebar-promo.svg?v=47faa659a05e"
  33 => "https://stackoverflow.com/teams/create/free?utm_source=so-owned&amp;utm_medium=side-bar&amp;utm_campaign=campaign-38&amp;utm_content=cta"
  34 => "https://stackoverflow.com/teams"
  35 => "https://stackoverflow.com/teams/create/free?utm_source=so-owned&amp;utm_medium=side-bar&amp;utm_campaign=campaign-38&amp;utm_content=cta"
  36 => "https://stackoverflow.com/teams"
  37 => "https://cdn.sstatic.net/Img/home/illo-code.svg?v=b7ee00fff9d8"
  38 => "https://cdn.sstatic.net/Img/home/illo-code.svg?v=b7ee00fff9d8"
  39 => "https://stackoverflow.com/teams/create/free"
  40 => "https://stackoverflow.com/teams"
  41 => "https://cdn.sstatic.net/Img/home/illo-public.svg?v=14bd5a506009"
  42 => "https://stackoverflow.com/teams"
  43 => "https://cdn.sstatic.net/Img/home/illo-teams.svg?v=7e543f14fcc0"
  44 => "https://stackoverflow.com/teams/create/free"
  45 => "https://stackoverflow.com/teams"
  46 => "https://cdn.sstatic.net/Img/product/teams/logos/box-alt.svg?v=eb76fd9d884f"
  47 => "https://cdn.sstatic.net/Img/product/teams/logos/expensify-alt.svg?v=09697472a3f5"
  48 => "https://cdn.sstatic.net/Img/product/teams/logos/intercom-alt.svg?v=3eda71aed47c"
  49 => "https://cdn.sstatic.net/Img/product/teams/logos/microsoft-alt.svg?v=e57319450314"
  50 => "https://cdn.sstatic.net/Img/product/teams/logos/bloomberg-alt.svg?v=9fc4f6650377"
  51 => "https://cdn.sstatic.net/Img/product/teams/logos/instacart-alt.svg?v=15bd0b39b197"
  52 => "https://cdn.sstatic.net/Img/product/teams/logos/barkbox-alt.svg?v=419890745024"
  53 => "https://cdn.sstatic.net/Img/product/teams/logos/logitech-alt.svg?v=a99c74b88566"
  54 => "https://cdn.sstatic.net/Img/product/teams/logos/overstock-alt.svg?v=ed38ea932870"
  55 => "https://cdn.sstatic.net/Img/product/teams/logos/chevron-alt.svg?v=3bfd2c06a64b"
  56 => "https://cdn.sstatic.net/Img/product/teams/logos/dialpad-alt.svg?v=4e63facf7f79"
  57 => "https://cdn.sstatic.net/Img/product/teams/logos/philips-alt.svg?v=7fc60c993103"
  58 => "https://cdn.sstatic.net/Img/product/teams/logos/siemens-alt.svg?v=cf0f5266d96b"
  59 => "https://cdn.sstatic.net/Img/product/teams/logos/verizon-media-alt.svg?v=f335b20096b2"
  60 => "https://cdn.sstatic.net/Img/product/teams/logos/wisetech-global-alt.svg?v=3b6b11e76536"
  61 => "https://cdn.sstatic.net/Img/product/teams/screens/illo-question.png?v=14c5863a5550"
  62 => "https://cdn.sstatic.net/Img/product/teams/screens/illo-question.png?v=14c5863a5550"
  63 => "https://cdn.sstatic.net/Img/product/teams/screens/illo-for-you.png?v=ab49238abe04"
  64 => "https://cdn.sstatic.net/Img/product/teams/screens/illo-for-you.png?v=ab49238abe04"
  65 => "https://cdn.sstatic.net/Img/product/teams/screens/illo-home-search.png?v=1ccd850cd929"
  66 => "https://cdn.sstatic.net/Img/product/teams/screens/illo-home-search.png?v=1ccd850cd929"
  67 => "https://stackoverflow.com/teams/create/free"
  68 => "https://stackoverflow.com/teams/create/basic"
  69 => "https://cdn.sstatic.net/Img/product/teams/illo-teams-pricing-shadow.svg?v=9954fe201b05"
  70 => "https://stackoverflow.com/teams/create/business"
  71 => "https://stackoverflow.com/enterprise/get-started"
  72 => "https://cdn.sstatic.net/Img/home/illo-integrations-left.png?v=0a97d470e180"
  73 => "https://cdn.sstatic.net/Img/home/illo-integrations-left.png?v=0a97d470e180"
  74 => "https://cdn.sstatic.net/Img/product/teams/microsoft-integration/microsoft-teams-logo.svg?v=8e1ff91711d7"
  75 => "https://cdn.sstatic.net/Img/home/illo-integrations-right.png?v=90c26b9154c7"
  76 => "https://cdn.sstatic.net/Img/home/illo-integrations-right.png?v=90c26b9154c7"
  77 => "https://apple.stackexchange.com"
  78 => "https://cdn.sstatic.net/Sites/apple/Img/apple-touch-icon.png?v=daa7ff1d953e"
  79 => "https://unix.stackexchange.com"
  80 => "https://cdn.sstatic.net/Sites/unix/Img/apple-touch-icon.png?v=5cf7fe716a89"
  81 => "https://ai.stackexchange.com"
  82 => "https://cdn.sstatic.net/Sites/ai/Img/apple-touch-icon.png?v=f14d741b295c"
  83 => "https://softwareengineering.stackexchange.com"
  84 => "https://cdn.sstatic.net/Sites/softwareengineering/Img/apple-touch-icon.png?v=5e581fc45e58"
  85 => "https://askubuntu.com/"
  86 => "https://cdn.sstatic.net/Sites/askubuntu/Img/apple-touch-icon.png?v=e16e1315edd6"
  87 => "https://salesforce.stackexchange.com"
  88 => "https://cdn.sstatic.net/Sites/salesforce/Img/apple-touch-icon.png?v=4c87c90207b3"
  89 => "https://cdn.sstatic.net/Img/home/illo-se.svg?v=f7e844293cc5"
  90 => "https://serverfault.com/"
  91 => "https://superuser.com/"
  92 => "https://stackexchange.com/sites"
  93 => "https://serverfault.com/"
  94 => "https://cdn.sstatic.net/Sites/serverfault/Img/apple-touch-icon.png?v=6c3100d858bb"
  95 => "https://superuser.com/"
  96 => "https://cdn.sstatic.net/Sites/superuser/Img/apple-touch-icon.png?v=0ad5b7a83e49"
  97 => "https://dba.stackexchange.com"
  98 => "https://cdn.sstatic.net/Sites/dba/Img/apple-touch-icon.png?v=cdcd5ff7b29e"
  99 => "https://quantumcomputing.stackexchange.com"
  100 => "https://cdn.sstatic.net/Sites/quantumcomputing/Img/apple-touch-icon.png?v=2af3cc2b87e8"
  101 => "https://gamedev.stackexchange.com"
  102 => "https://cdn.sstatic.net/Sites/gamedev/Img/apple-touch-icon.png?v=0cfb55927bd2"
  103 => "https://networkengineering.stackexchange.com"
  104 => "https://cdn.sstatic.net/Sites/networkengineering/Img/apple-touch-icon.png?v=d66b1118cec8"
  105 => "https://cdn.sstatic.net/Img/home/illo-se.svg?v=f7e844293cc5"
  106 => "https://stackoverflow.com/teams/create/free"
  107 => "https://stackoverflow.com"
  108 => "https://stackoverflow.com"
  109 => "https://stackoverflow.com/jobs"
  110 => "https://stackoverflow.com/jobs/directory/developer-jobs"
  111 => "https://stackoverflow.com/jobs/salary"
  112 => "https://stackoverflowbusiness.com"
  113 => "https://stackoverflow.com/teams"
  114 => "https://stackoverflow.com/talent"
  115 => "https://stackoverflow.com/advertising"
  116 => "https://stackoverflow.com/enterprise"
  117 => "https://stackoverflow.com/company"
  118 => "https://stackoverflow.com/company"
  119 => "https://stackoverflow.com/company/press"
  120 => "https://stackoverflow.com/company/work-here"
  121 => "https://stackoverflow.com/legal"
  122 => "https://stackoverflow.com/legal/privacy-policy"
  123 => "https://stackoverflow.com/legal/terms-of-service"
  124 => "https://stackoverflow.com/company/contact"
  125 => "https://stackoverflow.com/legal/cookie-policy"
  126 => "https://stackexchange.com"
  127 => "https://stackoverflow.com"
  128 => "https://serverfault.com"
  129 => "https://superuser.com"
  130 => "https://webapps.stackexchange.com"
  131 => "https://askubuntu.com"
  132 => "https://webmasters.stackexchange.com"
  133 => "https://gamedev.stackexchange.com"
  134 => "https://tex.stackexchange.com"
  135 => "https://softwareengineering.stackexchange.com"
  136 => "https://unix.stackexchange.com"
  137 => "https://apple.stackexchange.com"
  138 => "https://wordpress.stackexchange.com"
  139 => "https://gis.stackexchange.com"
  140 => "https://electronics.stackexchange.com"
  141 => "https://android.stackexchange.com"
  142 => "https://security.stackexchange.com"
  143 => "https://dba.stackexchange.com"
  144 => "https://drupal.stackexchange.com"
  145 => "https://sharepoint.stackexchange.com"
  146 => "https://ux.stackexchange.com"
  147 => "https://mathematica.stackexchange.com"
  148 => "https://salesforce.stackexchange.com"
  149 => "https://expressionengine.stackexchange.com"
  150 => "https://pt.stackoverflow.com"
  151 => "https://blender.stackexchange.com"
  152 => "https://networkengineering.stackexchange.com"
  153 => "https://crypto.stackexchange.com"
  154 => "https://codereview.stackexchange.com"
  155 => "https://magento.stackexchange.com"
  156 => "https://softwarerecs.stackexchange.com"
  157 => "https://dsp.stackexchange.com"
  158 => "https://emacs.stackexchange.com"
  159 => "https://raspberrypi.stackexchange.com"
  160 => "https://ru.stackoverflow.com"
  161 => "https://codegolf.stackexchange.com"
  162 => "https://es.stackoverflow.com"
  163 => "https://ethereum.stackexchange.com"
  164 => "https://datascience.stackexchange.com"
  165 => "https://arduino.stackexchange.com"
  166 => "https://bitcoin.stackexchange.com"
  167 => "https://sqa.stackexchange.com"
  168 => "https://sound.stackexchange.com"
  169 => "https://windowsphone.stackexchange.com"
  170 => "https://stackexchange.com/sites#technology"
  171 => "https://photo.stackexchange.com"
  172 => "https://scifi.stackexchange.com"
  173 => "https://graphicdesign.stackexchange.com"
  174 => "https://movies.stackexchange.com"
  175 => "https://music.stackexchange.com"
  176 => "https://worldbuilding.stackexchange.com"
  177 => "https://video.stackexchange.com"
  178 => "https://cooking.stackexchange.com"
  179 => "https://diy.stackexchange.com"
  180 => "https://money.stackexchange.com"
  181 => "https://academia.stackexchange.com"
  182 => "https://law.stackexchange.com"
  183 => "https://fitness.stackexchange.com"
  184 => "https://gardening.stackexchange.com"
  185 => "https://parenting.stackexchange.com"
  186 => "https://stackexchange.com/sites#lifearts"
  187 => "https://english.stackexchange.com"
  188 => "https://skeptics.stackexchange.com"
  189 => "https://judaism.stackexchange.com"
  190 => "https://travel.stackexchange.com"
  191 => "https://christianity.stackexchange.com"
  192 => "https://ell.stackexchange.com"
  193 => "https://japanese.stackexchange.com"
  194 => "https://chinese.stackexchange.com"
  195 => "https://french.stackexchange.com"
  196 => "https://german.stackexchange.com"
  197 => "https://hermeneutics.stackexchange.com"
  198 => "https://history.stackexchange.com"
  199 => "https://spanish.stackexchange.com"
  200 => "https://islam.stackexchange.com"
  201 => "https://rus.stackexchange.com"
  202 => "https://russian.stackexchange.com"
  203 => "https://gaming.stackexchange.com"
  204 => "https://bicycles.stackexchange.com"
  205 => "https://rpg.stackexchange.com"
  206 => "https://anime.stackexchange.com"
  207 => "https://puzzling.stackexchange.com"
  208 => "https://mechanics.stackexchange.com"
  209 => "https://boardgames.stackexchange.com"
  210 => "https://bricks.stackexchange.com"
  211 => "https://homebrew.stackexchange.com"
  212 => "https://martialarts.stackexchange.com"
  213 => "https://outdoors.stackexchange.com"
  214 => "https://poker.stackexchange.com"
  215 => "https://chess.stackexchange.com"
  216 => "https://sports.stackexchange.com"
  217 => "https://stackexchange.com/sites#culturerecreation"
  218 => "https://mathoverflow.net"
  219 => "https://math.stackexchange.com"
  220 => "https://stats.stackexchange.com"
  221 => "https://cstheory.stackexchange.com"
  222 => "https://physics.stackexchange.com"
  223 => "https://chemistry.stackexchange.com"
  224 => "https://biology.stackexchange.com"
  225 => "https://cs.stackexchange.com"
  226 => "https://philosophy.stackexchange.com"
  227 => "https://linguistics.stackexchange.com"
  228 => "https://psychology.stackexchange.com"
  229 => "https://scicomp.stackexchange.com"
  230 => "https://stackexchange.com/sites#science"
  231 => "https://meta.stackexchange.com"
  232 => "https://stackapps.com"
  233 => "https://api.stackexchange.com"
  234 => "https://data.stackexchange.com"
  235 => "https://stackoverflow.blog?blb=1"
  236 => "https://www.facebook.com/officialstackoverflow/"
  237 => "https://twitter.com/stackoverflow"
  238 => "https://linkedin.com/company/stack-overflow"
  239 => "https://www.instagram.com/thestackoverflow"
  240 => "https://stackoverflow.com/help/licensing"
  241 => "https://pixel.quantserve.com/pixel/p-c1rF4kxgLUzNc.gif"
  242 => "https://www.google-analytics.com/analytics.js"
  243 => "https://stackoverflow.com/legal/cookie-policy"

Now I want to modify my regex to only have urls from "https://stackoverflow.com" ( for example ).

I tried preg_match_all('#b'+$this->domain+'?://[^,s()<>]+(?:([wd]+)|([^,[:punct:]s]|/))#', $this->resource, $match); but it gives to me:

Warning: A non-numeric value encountered

Source: Ask PHP

LEAVE A COMMENT